In [1]:
import pandas as pd
import numpy as np
import missingno as ms
import matplotlib.pyplot as plt
import copy
import re
import seaborn as sns
import string
import warnings
warnings.filterwarnings("ignore")
In [2]:
#Read all the training dataframes
Maruti=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Maruti.csv")
Volvo=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Volvo.csv")
Volkswagen=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Volkswagen.csv")
Toyota=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Toyota.csv")
Tata=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Tata.csv")
Skoda=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Skoda.csv")
Rolls_Royce=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Rolls-Royce.csv")
Renault=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Renault.csv")
Porsche=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Porsche.csv")
Nissan=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Nissan.csv")
Mitsubishi=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Mitsubishi.csv")
Mini=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Mini.csv")
MG=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//MG.csv")
Mercedes_Benz=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Mercedes-Benz.csv")
Maserati=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Maserati.csv")
Aston_Martin=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Aston_Martin.csv")
Mahindra=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Mahindra.csv")
Lexus=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Lexus.csv")
Land_Rover=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Land_Rover.csv")
Lamborghini=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Lamborghini.csv")
Kia=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Kia.csv")
Jeep=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Jeep.csv")
Jaguar=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Maruti.csv")
Isuzu=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Isuzu.csv")
Hyundai=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Hyundai.csv")
Honda=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Honda.csv")
Ferrari=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Ferrari.csv")
Force=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Force.csv")
Bugatti=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Bugatti.csv")
BMW=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//BMW.csv")
Bentley=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Bentley.csv")
Audi=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Audi.csv")
Ford=pd.read_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//DataFrames//Train_data//Ford.csv")
In [3]:
#combine all the dataframes
main_data=pd.concat([Maruti,Ford,Audi,Bentley,BMW,Bugatti,Force,Ferrari,Honda,Hyundai,Isuzu,Jaguar,Jeep,Kia,Lamborghini,Land_Rover,Lexus,Mahindra,Aston_Martin,Maserati,Mercedes_Benz,MG,Mini,Mitsubishi,Nissan,Porsche,Renault,Rolls_Royce,Skoda,Tata,Toyota,Volkswagen,Volvo],axis=0)
In [7]:
main_data.head(5)
Out[7]:
| Unnamed: 0 | Unnamed: 0.1 | Model | Brand | Varient | ARAI Mileage | Engine Displacement (cc) | Max Power (bhp@rpm) | Seating Capacity | Boot Space (Litres) | ... | Ground Clearance Unladen (mm) | Electric Fuel Tank Capacity (Litres) | Motor Power | Diesel Mileage (WLTP) | Rear Legroom (mm) | Front Seat Base Length | Rear Seat Base Length | City driveability (20-50kmph) | Petrol Overall Mileage | Acceleration 0-60kmph | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 5728 | 0 | Maruti | Dzire | Dzire ZXI Plus | 23.26 kmpl | 1197.0 | 88.50bhp@6000rpm | 5.0 | 378.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1 | 13973 | 0 | Maruti | Eeco | Eeco 5 Seater AC | 16.11 kmpl | 1196.0 | 72.41bhp@6000rpm | 5.0 | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2 | 15607 | 0 | Maruti | S-Cross | S-Cross Zeta | 18.55 kmpl | 1462.0 | 103.25bhp@6000rpm | 5.0 | 375.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 3 | 5199 | 0 | Maruti | Dzire | Dzire ZXI Plus AT | 24.12 kmpl | 1197.0 | 88.50bhp@6000rpm | 5.0 | 378.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 4 | 13671 | 0 | Maruti | S-Presso | S-Presso LXi | 21.4 kmpl | 998.0 | 65.71bhp@5500rpm | 5.0 | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
5 rows × 299 columns
In [5]:
main_data.shape
Out[5]:
(160031, 299)
In [6]:
main_data.columns
Out[6]:
Index(['Unnamed: 0', 'Unnamed: 0.1', 'Model', 'Brand', 'Varient',
'ARAI Mileage', 'Engine Displacement (cc)', 'Max Power (bhp@rpm)',
'Seating Capacity', 'Boot Space (Litres)',
...
'Ground Clearance Unladen (mm)', 'Electric Fuel Tank Capacity (Litres)',
'Motor Power', 'Diesel Mileage (WLTP)', 'Rear Legroom (mm)',
'Front Seat Base Length', 'Rear Seat Base Length',
'City driveability (20-50kmph)', 'Petrol Overall Mileage',
'Acceleration 0-60kmph'],
dtype='object', length=299)
In [8]:
main_data.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 160031 entries, 0 to 549 Columns: 299 entries, Unnamed: 0 to Acceleration 0-60kmph dtypes: float64(33), int64(2), object(264) memory usage: 366.3+ MB
In [9]:
pd.set_option("display.max_rows",4)
# pd.set_option("display.max_columns",None)
In [10]:
main_data.describe()
Out[10]:
| Unnamed: 0 | Unnamed: 0.1 | Engine Displacement (cc) | Seating Capacity | No. of cylinder | Fuel Tank Capacity | Displacement (cc) | Valves Per Cylinder | Petrol Mileage (ARAI) | Petrol Fuel Tank Capacity (Litres) | ... | Front Headroom (mm) | Petrol Mileage (WLTP) | Front Legroom | Diesel Overall Mileage | Petrol City Mileage | Ground Clearance Unladen (mm) | Electric Fuel Tank Capacity (Litres) | Diesel Mileage (WLTP) | Rear Legroom (mm) | Petrol Overall Mileage | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 160031.000000 | 160031.000000 | 148820.000000 | 153784.00000 | 156553.000000 | 140439.000000 | 156553.000000 | 156553.000000 | 89098.000000 | 100865.00000 | ... | 1310.000000 | 2599.000000 | 462.000000 | 92.00 | 279.000000 | 14544.000000 | 192.000000 | 95.00 | 923.000000 | 846.000000 |
| mean | 10307.968756 | 1764.852685 | 1697.936447 | 5.15088 | 4.070832 | 48.566559 | 1745.041181 | 3.857428 | 17.771625 | 47.70058 | ... | 1034.641985 | 8.474282 | 441.246753 | 17.09 | 16.102366 | 193.647071 | 39.947917 | 6.99 | 406.990249 | 13.836525 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 75% | 15364.500000 | 0.000000 | 1956.000000 | 5.00000 | 4.000000 | 52.000000 | 1991.000000 | 4.000000 | 20.270000 | 50.00000 | ... | 1045.000000 | 10.600000 | 348.000000 | 17.09 | 20.240000 | 209.000000 | 45.000000 | 6.99 | 360.000000 | 17.000000 |
| max | 49265.000000 | 24149.000000 | 7993.000000 | 8.00000 | 16.000000 | 100.000000 | 7993.000000 | 8.000000 | 39.530000 | 100.00000 | ... | 1059.000000 | 16.390000 | 944.000000 | 17.09 | 20.240000 | 238.000000 | 45.000000 | 6.99 | 911.000000 | 29.400000 |
8 rows × 35 columns
Preprocessing¶
In [11]:
main=copy.deepcopy(main_data)
In [12]:
def calculate_missing_percentage(df):
percentages=[]
feature=[]
for i in df.columns:
no_of_missing=df["{}".format(i)].isnull().sum()
precent=(no_of_missing/df.shape[0])
percentages.append(precent)
feature.append(i)
return pd.DataFrame({
"Fearure":feature,
"Percentage":percentages
})
In [13]:
def remove_higher_percentage_missingcolumns(df,columns_name,threshold):
no_of_missing=df["{}".format(columns_name)].isnull().sum()
percent=(no_of_missing/df.shape[0])
if(percent>=threshold):
df.drop(["{}".format(columns_name)],axis=1,inplace=True)
return "Successfully applied"
In [14]:
calculate_missing_percentage(main)
Out[14]:
| Fearure | Percentage | |
|---|---|---|
| 0 | Unnamed: 0 | 0.000000 |
| 1 | Unnamed: 0.1 | 0.000000 |
| ... | ... | ... |
| 297 | Petrol Overall Mileage | 0.994714 |
| 298 | Acceleration 0-60kmph | 0.997625 |
299 rows × 2 columns
In [15]:
pd.Series(main.columns).apply(lambda x:remove_higher_percentage_missingcolumns(main,x,0.50))
Out[15]:
0 Successfully applied
1 Successfully applied
...
297 Successfully applied
298 Successfully applied
Length: 299, dtype: object
In [16]:
main.columns
Out[16]:
Index(['Unnamed: 0', 'Unnamed: 0.1', 'Model', 'Brand', 'Varient',
'ARAI Mileage', 'Engine Displacement (cc)', 'Max Power (bhp@rpm)',
'Seating Capacity', 'Boot Space (Litres)',
...
'Cup Holders-Front', 'Leather Seats', 'Driving Experience Control Eco',
'Ventilated Seats', 'Tyre Pressure Monitor', 'Rain Sensing Wiper',
'Turbo Charger', 'Air Quality Control', 'Traction Control',
'Vehicle Stability Control System'],
dtype='object', length=153)
In [17]:
main.drop(["Unnamed: 0","Unnamed: 0.1"],axis=1,inplace=True)
In [18]:
def target_preprocess(value):
split_values=value.split("+")
total=0
for i in split_values:
try:
price=int("".join(re.findall("[0-9]",i)))
total=total+price
except:
pass
return total
In [12]:
#start with Target columns
main["Price"]
Out[12]:
0 9,69,443*
1 5,58,668*+Rs.11,579
2 11,51,181*+Rs.40,166
3 10,63,940*+nan
4 5,74,208*+nan
...
545 58,80,255*
546 53,01,124*
547 73,34,649*
548 53,01,124*+nan
549 53,01,124*+nan
Name: Price, Length: 160031, dtype: object
In [13]:
main["Price"].unique()
Out[13]:
array(['9,69,443*', '5,58,668*+Rs.11,579', '11,51,181*+Rs.40,166', ...,
'1,19,03,278*', '51,12,533*+nan', '1,09,26,269*'], dtype=object)
In [14]:
main["Price"]=main["Price"].apply(target_preprocess)
In [15]:
main["Price"]
Out[15]:
0 969443
1 570247
2 1191347
3 1063940
4 574208
...
545 5880255
546 5301124
547 7334649
548 5301124
549 5301124
Name: Price, Length: 160031, dtype: int64
In [16]:
#Take Next columns
main["Model"].unique()
Out[16]:
array(['Maruti', 'Ford', 'Audi', 'Bentley', 'BMW', 'Bugatti', 'Force',
'Ferrari', 'Honda', 'Hyundai', 'Isuzu', 'Jeep', 'Kia',
'Lamborghini', 'Land_Rover', 'Lexus', 'Mahindra', 'Aston_Martin',
'Maserati', 'Mercedes-Benz', 'MG', 'Mini', 'Mitsubishi', 'Nissan',
'Porsche', 'Renault', 'Rolls-Royce', 'Skoda', 'Tata', 'Toyota',
'Volkswagen', 'Volvo'], dtype=object)
In [17]:
plt.figure(figsize=(30,10))
sns.countplot(main["Model"])
Out[17]:
<AxesSubplot:xlabel='Model', ylabel='count'>
In [18]:
main["Brand"].unique()
Out[18]:
array(['Dzire', 'Eeco', 'S-Cross', 'S-Presso', 'Celerio', 'Ciaz', 'Swift',
'Ignis', 'Alto 800', 'XL6', 'Baleno', 'Swift Dzire Tour',
'Super Carry', 'Ertiga', 'Alto K10', 'Brezza', 'Wagon R',
'Alto 800 tour', 'Ecosport 2015-2021', 'Figo 2010 2012', 'Fusion',
'Ikon', 'Figo', 'Fiesta 2008-2011', 'Endeavour 2015-2020',
'Aspire', 'Figo 2012-2015', 'Fiesta 2011-2013', 'Fiesta 2004-2008',
'Figo 2015-2019', 'Freestyle', 'Mustang', 'Endeavour',
'Endeavour 2014-2015', 'EcoSport', 'Escort', 'Fiesta',
'Endeavour 2009-2014', 'Falcon', 'Endeavour 2007-2009',
'Mondeo 2001-2006', 'Endeavour 2003-2007', 'Q7', 'RS e-tron GT',
'Q2', 'Q3', 'A6', 'RS5', 'A4', 'e-tron', 'A8L', 'Q5', 'RS Q8',
'Q8', 'e-tron GT', 'RS7', 'S5 Sportback', 'Continental',
'Bentayga', 'Flying Spur', 'X5', 'M2', 'Z4', 'X1', '3 Series',
'2 Series', '6 Series', 'X7', 'iX', 'X3', 'M5', '7 Series',
'8 Series', '5 Series', 'X5 M', 'i4', 'X6', 'X3 M',
'M4 Competition', 'Divo', 'Veyron', 'Gurkha', 'Portofino',
'296 GTB', 'F8 Tributo', 'Roma', '812', 'SF90 Stradale', 'Jazz',
'City', 'WR-V', 'Amaze', 'City Hybrid', 'City 4th Generation',
'Creta', 'Aura', 'Santro', 'Venue', 'i20', 'Venue N Line',
'Tucson', 'Alcazar', 'Grand i10 Nios', 'Verna', 'i20 N Line',
'MU-X', 'D-Max', 'Compass', 'Meridian', 'Wrangler',
'Compass Trailhawk', 'Carens', 'Seltos', 'Sonet', 'Carnival',
'EV6', 'Huracan EVO', 'Aventador', 'Urus', 'Defender', 'Discovery',
'Discovery Sport', 'NX', 'LC 500h', 'LS', 'RX', 'ES', 'LX',
'XUV300', 'Scorpio-N', 'Alturas G4', 'Bolero', 'XUV700',
'KUV 100 NXT', 'Thar', 'Bolero Neo', 'Bolero Camper', 'E Verito',
'Scorpio Classic', 'Marazzo', 'Vantage', 'DB11', 'DBX', 'Levante',
'Quattroporte', 'Ghibli', 'GranCabrio', 'GLE', 'E-Class',
'AMG A 45 S', 'EQC', 'AMG C 63', 'C-Class', 'Maybach S-Class',
'GLC Coupe', 'AMG E 63', 'AMG GLE 63 S', 'AMG A 35', 'AMG E 53',
'A-Class Limousine', 'AMG GLA 35', 'S-Class', 'AMG GLE 53',
'AMG G 63', 'GLS', 'V-Class', 'CLS', 'AMG C 43', 'GLC', 'AMG GT',
'AMG GLC 43', 'G-Class', 'GLA', 'AMG GT 4-Door Coupe', 'EQS',
'Hector', 'Hector Plus', 'Gloster', 'Astor', 'ZS EV',
'Cooper 3 DOOR', 'John cooper Works', 'Cooper Convertible',
'Cooper SE', 'Cooper Countryman', 'Lancer', 'Outlander 2007-2013',
'Pajero 2002-2012', 'Montero 2007-2012', 'Cedia', 'FTO',
'Lancer Evolution X', 'Montero 2009-2014', 'Challenger',
'Pajero Sport', 'Outlander', 'Montero', 'Magnite', 'Kicks', 'GT-R',
'Panamera', '911', 'Taycan', 'Cayenne Coupe', '718', 'Cayenne',
'Macan', 'Kiger', 'Triber', 'KWID', 'Phantom', 'Ghost',
'Rolls Royce Dawn', 'Cullinan', 'Slavia', 'Kodiaq', 'Kushaq',
'Octavia', 'Superb', 'Harrier', 'Nexon', 'Tiago', 'Punch',
'Altroz', 'Tiago NRG', 'Safari', 'Nexon EV Max', 'Tigor EV',
'Yodha Pickup', 'Tigor', 'Nexon EV Prime', 'Fortuner', 'Hilux',
'Innova Crysta', 'Glanza', 'Urban Cruiser Hyryder', 'Camry',
'Vellfire', 'Taigun', 'Virtus', 'Vento', 'Tiguan', 'XC90', 'S90',
'S60', 'XC40', 'XC60', 'XC40 Recharge'], dtype=object)
In [19]:
main["Varient"].unique()
Out[19]:
array(['Dzire ZXI Plus', 'Eeco 5 Seater AC', 'S-Cross Zeta', ...,
'XC40 B4 Ultimate', 'XC60 B5 Inscripition', 'XC40 Recharge P8 AWD'],
dtype=object)
In [4]:
def remove_brand_name(brand,varient):
var=varient.replace(brand,"")
if var[0]==" ":
var=var[1:]
return var
In [5]:
main["Varient"]=main.apply(lambda x:remove_brand_name(x["Brand"],x["Varient"]),axis=1)
In [6]:
main["Varient"].unique()
Out[6]:
array(['220d M Sport', '220i Sport', '220i M Sport', ...,
'N8 iMT Dual tone', 'eDrive40', 'xDrive40'], dtype=object)
In [20]:
def ARAI_Mileage_preprocess(text):
text=str(text)
if text=="nan":
return np.nan
varient="".join(re.findall("[a-zA-Z\/]",text))
if varient=="kmpl":
text=float(text.replace(varient,""))
elif varient=="km/kg":
text=float(text.replace(varient,""))*1.40
return text
In [21]:
#This feature is in object type we need to convert it into float
main["ARAI Mileage"]
Out[21]:
0 23.26 kmpl
1 16.11 kmpl
2 18.55 kmpl
3 24.12 kmpl
4 21.4 kmpl
...
545 NaN
546 NaN
547 NaN
548 NaN
549 NaN
Name: ARAI Mileage, Length: 160031, dtype: object
In [22]:
main["ARAI Mileage"].unique()
Out[22]:
array(['23.26 kmpl', '16.11 kmpl', '18.55 kmpl', '24.12 kmpl',
'21.4 kmpl', '26.0 kmpl', '20.04 kmpl', '23.76 kmpl', '20.89 kmpl',
'31.59 km/kg', '20.97 kmpl', '22.94 kmpl', '26.55 km/kg', nan,
'23.2 kmpl', '26.11 km/kg', '22.35 kmpl', '19.95 kmpl',
'18.43 kmpl', '20.3 kmpl', '25.24 kmpl', '20.51 kmpl',
'24.39 kmpl', '21.7 kmpl', '19.89 kmpl', '34.05 km/kg',
'20.65 kmpl', '35.6 km/kg', '20.27 kmpl', '22.05 kmpl',
'19.8 kmpl', '30.9 km/kg', '20.88 km/kg', '24.97 kmpl',
'23.56 kmpl', '24.35 kmpl', '24.43 kmpl', '26.68 kmpl',
'20.15 kmpl', '25.19 kmpl', '24.9 kmpl', '23.0 kmpl', '15.6 kmpl',
'17.7 kmpl', '14.2 kmpl', '18.5 kmpl', '20.0 kmpl', '15.3 kmpl',
'13.8 kmpl', '26.1 kmpl', '23.5 kmpl', '25.83 kmpl', '16.6 kmpl',
'20.4 kmpl', '17.8 kmpl', '13.6 kmpl', '24.29 kmpl', '15.9 kmpl',
'19.0 kmpl', '13.0 kmpl', '13.9 kmpl', '23.8 kmpl', '13.1 kmpl',
'25.5 kmpl', '11.4 kmpl', '18.16 kmpl', '14.2 km/kg', '24.4 kmpl',
'17.0 kmpl', '12.8 kmpl', '25.01 kmpl', '13.5 kmpl', '16.86 kmpl',
'14.0 kmpl', '12.62 kmpl', '18.12 kmpl', '20.4 km/kg', '10.9 kmpl',
'17.01 kmpl', '9.0 kmpl', '22.77 kmpl', '19.4 kmpl', '18.1 kmpl',
'16.0 kmpl', '12.4 kmpl', '16.97 kmpl', '16.3 kmpl', '14.7 kmpl',
'10.91 kmpl', '11.21 kmpl', '14.11 kmpl', '8.8 kmpl', '13.47 kmpl',
'9.8 kmpl', '8.9 kmpl', '12.5 kmpl', '12.9 kmpl', '10.2 kmpl',
'10.1 kmpl', '11.24 kmpl', '10.63 kmpl', '14.37 kmpl',
'19.62 kmpl', '14.82 kmpl', '16.13 kmpl', '20.37 kmpl',
'13.32 kmpl', '13.38 kmpl', '11.86 kmpl', '12.04 kmpl',
'13.17 kmpl', '11.29 kmpl', '9.12 kmpl', '5.59 kmpl', '17.42 kmpl',
'18.65 kmpl', '39.53 kmpl', '8.29 kmpl', '18.64 kmpl',
'10.31 kmpl', '11.3 kmpl', '10.54 kmpl', '17.09 kmpl',
'16.55 kmpl', '17.66 kmpl', '6.8 kmpl', '17.1 kmpl', '18.4 kmpl',
'23.7 kmpl', '24.7 kmpl', '18.6 kmpl', '16.5 kmpl', '26.5 kmpl',
'17.4 kmpl', '24.1 kmpl', '20.1 kmpl', '25.0 kmpl', '21.0 kmpl',
'20.5 kmpl', '18.45 kmpl', '20.28 kmpl', '14.5 kmpl', '19.65 kmpl',
'20.7 kmpl', '16.8 kmpl', '21.3 kmpl', '20.25 kmpl', '30.48 km/kg',
'28.0 km/kg', '19.2 kmpl', '12.31 kmpl', '16.56 kmpl', '14.1 kmpl',
'15.7 kmpl', '16.2 kmpl', '12.1 kmpl', '14.3 kmpl', '14.9 kmpl',
'17.3 kmpl', '18.3 kmpl', '18.2 kmpl', '20.8 kmpl', '18.0 kmpl',
'7.25 kmpl', '7.04 kmpl', '7.69 kmpl', '7.87 kmpl', '7.3 kmpl',
'12.3 kmpl', '15.4 kmpl', '18.8 kmpl', '22.37 kmpl', '6.9 kmpl',
'12.05 kmpl', '18.15 kmpl', '17.29 kmpl', '15.2 kmpl', '12.0 kmpl',
'11.76 kmpl', '9.7 kmpl', '16.34 kmpl', '8.62 kmpl', '8.26 kmpl',
'16.1 kmpl', '16.9 kmpl', '15.0 kmpl', '12.74 kmpl', '12.65 kmpl',
'8.13 kmpl', '14.025 kmpl', '16.65 kmpl', '17.33 kmpl',
'15.81 kmpl', '16.72 kmpl', '14.34 kmpl', '13.7 kmpl', '9.5 kmpl',
'12.25 kmpl', '14.7 km/kg', '14.8 kmpl', '10.5 kmpl', '11.56 kmpl',
'18.75 kmpl', '14.23 kmpl', '10.75 kmpl', '9.17 kmpl',
'16.12 kmpl', '19.03 kmpl', '19.17 kmpl', '22.02 kmpl',
'22.25 kmpl', '18.24 kmpl', '22.0 kmpl', '18.72 kmpl',
'12.78 kmpl', '18.07 kmpl', '15.78 kmpl', '17.95 kmpl',
'19.47 kmpl', '17.88 kmpl', '18.41 kmpl', '15.1 kmpl', '17.2 kmpl',
'14.6 kmpl', '17.57 kmpl', '20.09 kmpl', '18.82 kmpl',
'18.97 kmpl', '18.53 kmpl', '14.08 kmpl', '16.35 kmpl',
'16.14 kmpl', '22.07 kmpl', '21.19 kmpl', '23.03 kmpl',
'26.49 km/kg', '18.13 kmpl', '19.27 kmpl', '8.0 kmpl',
'27.97 kmpl', '10.0 kmpl', '20.58 kmpl', '17.23 kmpl',
'18.47 kmpl', '18.67 kmpl', '17.69 kmpl', '36.0 kmpl', '11.2 kmpl'],
dtype=object)
In [23]:
main["ARAI Mileage"]=pd.Series(main["ARAI Mileage"]).apply(ARAI_Mileage_preprocess)
In [24]:
main.rename(columns={"ARAI Mileage":"ARAI Mileage(Km/L)"},inplace=True)
In [25]:
main["ARAI Mileage(Km/L)"]
Out[25]:
0 23.26
1 16.11
2 18.55
3 24.12
4 21.40
...
545 NaN
546 NaN
547 NaN
548 NaN
549 NaN
Name: ARAI Mileage(Km/L), Length: 160031, dtype: float64
In [26]:
main["Engine Displacement (cc)"]
Out[26]:
0 1197.0
1 1196.0
2 1462.0
3 1197.0
4 998.0
...
545 NaN
546 1969.0
547 1969.0
548 1969.0
549 1969.0
Name: Engine Displacement (cc), Length: 160031, dtype: float64
In [27]:
main["Max Power (bhp@rpm)"]
Out[27]:
0 88.50bhp@6000rpm
1 72.41bhp@6000rpm
2 103.25bhp@6000rpm
3 88.50bhp@6000rpm
4 65.71bhp@5500rpm
...
545 402.30bhp
546 NaN
547 246.58Bhp
548 NaN
549 NaN
Name: Max Power (bhp@rpm), Length: 160031, dtype: object
In [127]:
main["Max Power (bhp@rpm)"].unique()
Out[127]:
array(['88.50bhp@6000rpm', '72.41bhp@6000rpm', '103.25bhp@6000rpm',
'65.71bhp@5500rpm', '81.80bhp@6000rpm', '40.36bhp@6000rpm',
'101.65bhp@6000rpm', '70.40bhp@6000rpm', '86.63bhp@5500rpm',
'55.92bhp@5300rpm', nan, '47.33bhp@6000rpm', '76.43bhp@6000rpm',
'61.68bhp@6000rpm', '98.96bhp@3750rpm', '70bhp@6250rpm',
'68 @ 4,000 (PS@rpm)', '92@5500(PS@rpm)', '94.93bhp@6500rpm',
'68bhp@4000rpm', '101 @ 6,500 (PS@rpm)', '158.2bhp@3200rpm',
'70 @ 5,500 (PS@rpm)', '99.23bhp@3750rpm', '68.05bhp@4000rpm',
'90ps @ 3750rpm', '99bhp@3750rpm', '121bhp@6500rpm',
'70.02bhp@6250rpm', '120.69bhp@6500rpm', '94.68bhp@6500rpm',
'395bhp@6500+-50rpm', '167.62bhp@3500rpm', '141bhp@3500rpm',
'153.86bhp@3200rpm', '86.8bhp@6300rpm', '98.63bhp@3750rpm',
'109PS @ 6450rpm', '89.75bhp@3750rpm', '157.7bhp@3200rpm',
'88.7bhp@3750rpm', '107.5bhp@6045rpm', '98.59bhp@3750rpm',
'158bhp@3200rpm', '94.89bhp@6500rpm', '197bhp@3000rpm',
'110.4bhp@6300rpm', '110.5bhp@6300rpm', '123.24bhp@6000rpm',
'95.48bhp@6500rpm', '153.8bhp@3200rpm', '143 @ 3,500 (PS@rpm)',
'197.2bhp@3000rpm', '335.25bhp@5200-6400rpm', '636.98bhp',
'187.74bhp@4200-6000rpm', '187.74bhp@1500-4100rpm',
'241.3bhp@5000-6500rpm', '443.87bhp@5700-6700rpm',
'187.74bhp@4200-6000', '230', '335.25bhp@5000-6400rpm', '300',
'245.59bhp@5000-6000rpm', '591.39bhp@6000rpm', '522.99kw',
'591bhp@6000-6250rpm', '348.66bhp@5400-6400rpm',
'340bhp@5000-6400rpm', '500bhp@6000rpm', '542bhp@6000rpm',
'562bhp@6000rpm', '626bhp@5000-6000rpm', '335.26bhp@5500-6500rpm',
'410bhp@6250rpm', '194bhp@4500-6500rpm', '187.74bhp@5000-6000rpm',
'257.47bhp@5000-6000rpm', '254.79bhp@5200rpm', '189.08bhp@5000rpm',
'187.74bhp@4000rpm', '254.79bhp@5000rpm', '261.50bhp@4000rpm',
'382.19bhp@5800rpm', '394.26bhp@4400rpm', '321.84Bhp',
'248.08bhp@5200rpm', '335bhp@5000-6500rpm', '616.87bhp@6000rpm',
'335.25bhp@5500-6500rpm', '600bhp@6000rpm', '261.49bhp@4000rpm',
'281.6bhp@5000-6000rpm', '335.25bhp', '335.25bhp@5000-6500rpm',
'473.38bhp@6250rpm', '502.88bhp@6250rpm', '261.4bhp@4000rpm',
'187.74bhp@4200rpm', '261.49bhp', '189.08bhp@5000-6000rpm',
'1479bhp@6700rpm', '1001bhp@6000rpm', '89.84bhp@3200rpm',
'591.79bhp@7500rpm', '710.74bhp@8000rpm', '611.50bhp@5750-7500rpm',
'788.52@8500rpm', '769.31@7500rpm', '119.35bhp@6600rpm',
'97.89bhp@3600rpm', '96.55bhp@5600-6400rpm', '117.60bhp@6600rpm',
'79.12bhp@3600rpm', '113.18bhp@6300rpm', '81.86bhp@6000rpm',
'68.05bhp@5500rpm', '118.41bhp@6000rpm', '98.63bhp@4000rpm',
'113.45bhp@4000rpm', '153.81bhp@6200rpm', '156.82bhp@6500rpm',
'68.05bhp@6000rpm', '118.36bhp@6000rpm', '113.42bhp@4000rpm',
'98.56bhp@4000rpm', '98.63bhp@6000rpm', '86.80bhp@6000rpm',
'138.12bhp@6000rpm', '183.72bhp@4000rpm', '59.17bhp@5500rpm',
'160.92bhp@3600rpm', '77.77bhp@3800rpm', '160.77bhp@5500rpm',
'167.67bhp@3750rpm', '268bhp@5250rpm', '113.42bhp@6300rpm',
'113.43bhp@6300rpm', '138.08bhp@6000rpm', '113.43bhp@4000rpm',
'138.05bhp@6000rpm', '197.26bhp@3800rpm', '320.55bhp', '225.86bhp',
'602.11bhp@8000rpm', '630.28bhp@8000rpm', '610bhp',
'630.3bhp@8000rpm', '759.01bhp@8500rpm', '641bhp@6000rpm',
'641.00bhp@6000rpm', '640bhp@8000rpm', '187.74bhp@6000rpm',
'295.02@6600rpm', '354bhp@6600rpm', '258.81bhp@6000rpm',
'214.56bhp@5700rpm', '362bhp@5600rpm', '108.6bhp@5000rpm',
'178.49bhp@3800rpm', '115bhp@3750rpm', '74.96bhp@3600rpm',
'197.13bhp@5000rpm', '82bhp@5500rpm', '150bhp@5000rpm',
'100bhp@3750rpm', '75.09bhp@3200rpm', '108.62bhp@5000rpm',
'182.38bhp@3500rpm', '41.57bhp@3500rpm', '130bhp@3750rpm',
'130.07bhp@3750rpm', '120.96bhp@3500rpm', '152.87bhp@3750rpm',
'275bhp@4000rpm', '275bhp', '350bhp@5750rpm', '430bhp@5750rpm',
'530bhp@6800rpm', '450bhp@7000rpm', '325.8bhp@3600-4200rpm',
'281.61bhp@3400-4600rpm', '415.71bhp@6750rpm', '402.30Bhp',
'469.35bhp@5500-6250rpm', '261.49bhp@4200rpm',
'603.46bhp@5250-5500', '241.38bhp@4200rpm',
'603.46bhp@5750-6500rpm', '191.76bhp@3800rpm', '301.73bhp@5800rpm',
'429.12bhp@6100rpm', '160.92bhp@5500rpm', '201.15bhp@5800-6800rpm',
'194.44bhp@5500-6100rpm', '281.61bhp@3400-4600bhp',
'435bhp@5500-6100rpm', '576.63bhp@6000rpm',
'325.86bhp3600-4200rpm', '161bhp@3800rpm',
'254.79bhp@5800-6100rpm', '241.3bhp@4200rpm',
'325.8bhp@3600-4000rpm', '362.07bhp5500-6100bhp',
'362.07bhp@5500-6100rpm', '384.87@6100rpm', '197bhp@5500-6100rpm',
'576.63bhp@6250rpm', '384.87bhp@5500–6100rpm', '187.74bhp@3800rpm',
'639bhp', '496.17bhp@5500-5500', '194bhp@3800rpm',
'160.92bhp@4200rpm', '197.13bhp@3600rpm', '549.81bhp6000-6500rpm',
'750.97bhp', '147.51bhp@1620-4000rpm', '167.68bhp@3750rpm',
'141bhp@5000rpm', '158.79bhp@4000rpm', '212.55bhp@4000rpm',
'138.08bhp@5600rpm', '108.49bhp@6000rpm', '167.67Bhp@3750rpm',
'173.83bhp', '189.08bhp@4700-6000pm', '227.97', '181.03bhp',
'85.8bhp@5500rpm', '170PS @ 6000rpm', '118.6@4000 (PS@rpm)',
'164.5 @ 3,500 (PS@rpm)', '115 @ 5,250 (PS@rpm)',
'68@4,500 (PS@rpm)', '107.2bhp@4000rpm', '290bhp@6500rpm',
'199.3bhp@3800rpm', '114bhp@5250rpm', '175.56bhp@4000rpm',
'164.94bhp@6000rpm', '191.3bhp@3800rpm', '178bhp@4000rpm',
'98.63bhp@5000rpm', '71.02bhp@6250rpm', '153.87bhp@5500rpm',
'562.20bhp@6800rpm', '104.55bhp@5600rpm', '325.48bhp@5400–6400rpm',
'379.50bhp@6500', '502.88bhp@8400rpm', '482.76bhp',
'443.87bhp@6500', '542.4bhp@5750-6000rpm', '616.87bhp',
'295bhp@6500rpm', '680bhp@5750-6000rpm', '321.84bhp',
'340bhp@5300-6400rpm', '414.37bhp@7600rpm',
'434.49bhp@5700-6600rpm', '493.49bhp@8400rpm', '394.26bhp@7000rpm',
'641.00bhp@6500', '335bhp@5300-6400rpm', '631.62bhp@6000rpm',
'261.49bhp@5000-6500rpm', '375.48bhp@5200-6700rpm',
'453.26bhp@6000–6500rpm', '541.773bhp@5750-6000rpm',
'550bhp@5750-6000rpm', '450hp@6500', '71.01bhp@6250rpm',
'53.26bhp@5600rpm', '67.06bhp@5500rpm', '563bhp@5000rpm',
'563bhp@5250rpm', '563bhp@5250-6000rpm', '147.52bhp@5000-6000rpm',
'113.98bhp@5000-5500rpm', '147.51bhp@5000-6000rpm',
'187.74bhp@4180-6000rpm', '118.36bhp@5500rpm', '84.82bhp@6000rpm',
'84.48bhp@6000rpm', '84.88bhp@6000rpm', '141.04bhp', '73.75bhp',
'85bhp@3000rpm', '108.49bhp@4000rpm', '88.77bhp@4000rpm',
'72.40bhp@6000rpm', '108.50bhp@5500rpm', '72bhp@6000rpm', '127bhp',
'85.82bhp@3000rpm', '201.15bhp@3400rpm', '201.15bhp@3000-3400rpm',
'163.60bhp@5200rpm', '91.18bhp@5500rpm', '101.64bhp@6000rpm',
'175.67bhp@5700rpm', '115.32bhp@4700rpm', '108.62bhp@5000-5500rpm',
'400bhp', '246.58Bhp', '190bhp', '246.74bhp@4000rpm', '402.30bhp'],
dtype=object)
In [128]:
def bhp_rpm_preprocess(text):
try:
global final_df,k
bhp=[]
rpm=[]
text=str(text)
split_value=text.split("@",1)
split_value.reverse()
if len(split_value)==1:
check="".join(re.findall('[a-zA-Z]',split_value[0])).lower()
if check=="bhprpm":
_replace=split_value[0].replace("bhp","bhp@")
lists=_replace.split("@")
split_value=[]
split_value.append(lists[1])
split_value.append(lists[0])
elif check=="bhpbhp":
_replace=split_value[0].replace("bhp","bhp@")
lists=_replace.split("@")
split_value=[]
split_value.append(lists[1])
split_value.append(lists[0])
if len(split_value)==2:
first="".join(re.findall('[a-zA-Z]',split_value[0]))
second="".join(re.findall('[a-zA-Z]',split_value[1]))
if first==second:
split_value[0]=split_value[0].replace(first,"rpm")
if first=="pm":
split_value[0]=split_value[0].replace(first,"rpm")
if second=="hp":
split_value[1]=split_value[1].replace(second,"bhp")
j=0
Activate=False
for i in split_value:
if len(split_value)==2:
check="".join(re.findall('[a-zA-Z]',i)).lower()
if check=="psrpm":
try:
rpm.append((float(i.replace("".join(re.findall("[\sa-zA-Z\@\(\)]",i)).strip(),""))))
Activate=True
except:
first=i.replace("".join(re.findall("[\sa-zA-Z\@\(\)]",i)).strip(),"")
rpm.append(float(first.replace("".join(re.findall('\,',first)).strip(),"")))
Activate=True
else:
if Activate==True:
convert_into_bhp=(float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))/1.01387)
bhp.append(convert_into_bhp)
Activate=False
else:
if check=="":
if j==1:
bhp.append((float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))))
elif j==0:
try:
rpm.append((float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))))
except:
intial=i.replace("".join(re.findall("[a-zA-Z]",i)),"")
lists=intial.split("-")
average_of_rpm=(float(lists[0])+float(lists[1]))/2
rpm.append(average_of_rpm)
else:
if check=="kw":
convert_into_bhp=(float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))/0.745699872)
bhp.append(convert_into_bhp)
elif check=="bhp":
_bhp=float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))
bhp.append(_bhp)
elif check=="ps":
convert_into_bhp=(float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))/1.014)
bhp.append(convert_into_bhp)
elif check=="rpm":
try:
_rpm=float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))
rpm.append(_rpm)
except:
try:
intial=i.replace("".join(re.findall("[a-zA-Z]",i)),"")
try:
lists=intial.split("-")
average_of_rpm=(float(lists[0])+float(lists[1]))/2
rpm.append(average_of_rpm)
except:
lists=intial.split("–")
average_of_rpm=(float(lists[0])+float(lists[1]))/2
rpm.append(average_of_rpm)
except:
intial=i.replace("".join(re.findall("[a-zA-Z]",i)),"")
lists=intial.split("+-")
_first=float(lists[0])#take the average value
rpm.append(_first)
else:
check="".join(re.findall('[a-zA-Z]',i)).lower()
if check=="kw":
convert_into_bhp=(float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))/0.745699872)
bhp.append(convert_into_bhp)
rpm.append(0)
elif check=="bhp":
bhp.append((float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))))
rpm.append(0)
elif check=="rpm":
rpm.append((float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))))
bhp.append(0)
elif check=="nan":
bhp.append(np.nan)
rpm.append(np.nan)
else:
bhp.append((float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))))
rpm.append(0)
j=j+1
temp=pd.DataFrame({
"BHP":bhp,
"RPM":rpm
})
final_df=pd.concat([final_df,temp],axis=0)
print(k)
k=k+1
except:
print("UnSuccessfully executed",k)
k=k+1
In [ ]:
#This are the possible cases of the Max Power (bhp@rpm) column....
# 522.99kw
# 300 #Rpm
# 187.74bhp@4200-6000rpm
# 636.98bhp
# 153.86bhp@3200rpm
# 68 @ 4,000 (PS@rpm)
# 92@5500(PS@rpm)
# 90ps @ 3750rpm
# 395bhp@6500+-50rpm
# 34hp@2000
# 90bhp@1000pm
# 125bhp2000rpm
In [62]:
k=0
final_df=pd.DataFrame()
pd.Series(main["Max Power (bhp@rpm)"]).apply(bhp_rpm_preprocess)
main.shape
Out[62]:
(160031, 155)
In [63]:
final_df.shape
Out[63]:
(160031, 3)
In [29]:
final_df=final_df.reset_index().drop(["index"],axis=1)
In [30]:
main=main.reset_index(drop=True)
In [31]:
main=pd.concat([main,final_df],axis=1)
In [32]:
main[["Max Power (bhp@rpm)","BHP","RPM"]]
Out[32]:
| Max Power (bhp@rpm) | BHP | RPM | |
|---|---|---|---|
| 0 | 88.50bhp@6000rpm | 88.50 | 6000.0 |
| 1 | 72.41bhp@6000rpm | 72.41 | 6000.0 |
| 2 | 103.25bhp@6000rpm | 103.25 | 6000.0 |
| 3 | 88.50bhp@6000rpm | 88.50 | 6000.0 |
| 4 | 65.71bhp@5500rpm | 65.71 | 5500.0 |
| ... | ... | ... | ... |
| 160026 | 402.30bhp | 402.30 | 0.0 |
| 160027 | NaN | NaN | NaN |
| 160028 | 246.58Bhp | 246.58 | 0.0 |
| 160029 | NaN | NaN | NaN |
| 160030 | NaN | NaN | NaN |
160031 rows × 3 columns
In [33]:
main.iloc[155023][["Max Power (bhp@rpm)","RPM","BHP"]]
Out[33]:
Max Power (bhp@rpm) 91.18bhp@5500rpm RPM 5500.0 BHP 91.18 Name: 155023, dtype: object
In [34]:
final_df.iloc[155023]
Out[34]:
Unnamed: 0 155023.00 BHP 91.18 RPM 5500.00 Name: 155023, dtype: float64
In [35]:
main.drop(["Max Power (bhp@rpm)"],axis=1,inplace=True)
In [36]:
main["Seating Capacity"]
Out[36]:
0 5.0
1 5.0
2 5.0
3 5.0
4 5.0
...
160026 5.0
160027 NaN
160028 5.0
160029 NaN
160030 NaN
Name: Seating Capacity, Length: 160031, dtype: float64
In [37]:
main["Boot Space (Litres)"]
Out[37]:
0 378.0
1 NaN
2 375.0
3 378.0
4 NaN
...
160026 414
160027 NaN
160028 NaN
160029 NaN
160030 NaN
Name: Boot Space (Litres), Length: 160031, dtype: object
In [38]:
main["Body Type"]
Out[38]:
0 Sedan
1 Minivan
2 SUV
3 Sedan
4 Hatchback
...
160026 SUV
160027 SUV
160028 Sedan
160029 SUV
160030 SUV
Name: Body Type, Length: 160031, dtype: object
In [39]:
main["Body Type"].unique()
Out[39]:
array(['Sedan', 'Minivan', 'SUV', 'Hatchback', 'MUV', 'Pickup Truck',
'Coupe', 'Luxury', 'Convertible', nan, 'Wagon', 'Hybrid'],
dtype=object)
In [40]:
plt.figure(figsize=(15,10))
sns.countplot(main["Body Type"])
Out[40]:
<AxesSubplot:xlabel='Body Type', ylabel='count'>
In [41]:
main.columns
Out[41]:
Index(['Model', 'Brand', 'Varient', 'ARAI Mileage(Km/L)',
'Engine Displacement (cc)', 'Seating Capacity', 'Boot Space (Litres)',
'Body Type', 'Fuel Type', 'No. of cylinder',
...
'Ventilated Seats', 'Tyre Pressure Monitor', 'Rain Sensing Wiper',
'Turbo Charger', 'Air Quality Control', 'Traction Control',
'Vehicle Stability Control System', 'Unnamed: 0', 'BHP', 'RPM'],
dtype='object', length=153)
In [42]:
main["Fuel Type"]
Out[42]:
0 Petrol
1 Petrol
2 Petrol
3 Petrol
4 Petrol
...
160026 Electric
160027 Petrol
160028 Petrol
160029 Petrol
160030 Petrol
Name: Fuel Type, Length: 160031, dtype: object
In [43]:
main["Fuel Type"].unique()
Out[43]:
array(['Petrol', 'CNG', 'Diesel', 'Electric'], dtype=object)
In [44]:
plt.figure(figsize=(15,5))
sns.countplot(main["Fuel Type"])
Out[44]:
<AxesSubplot:xlabel='Fuel Type', ylabel='count'>
In [45]:
main["No. of cylinder"]
Out[45]:
0 4.0
1 4.0
2 4.0
3 4.0
4 3.0
...
160026 NaN
160027 4.0
160028 4.0
160029 4.0
160030 4.0
Name: No. of cylinder, Length: 160031, dtype: float64
In [46]:
main['Max Torque (nm@rpm)']
Out[46]:
0 113Nm@4400rpm
1 98Nm@3000rpm
2 138Nm@4400rpm
3 113Nm@4400rpm
4 89Nm@3500rpm
...
160026 660Nm
160027 NaN
160028 350Nm
160029 NaN
160030 NaN
Name: Max Torque (nm@rpm), Length: 160031, dtype: object
In [102]:
main["Max Torque (nm@rpm)"].unique()
Out[102]:
array(['113Nm@4400rpm', '98Nm@3000rpm', '138Nm@4400rpm', '89Nm@3500rpm',
'89nm@3500rpm', '113Nm@4200rpm', '60Nm@3500rpm', '136.8nm@4400rpm',
'95nm@4000rpm', '98Nm @3000rpm', '121.5nm@4200rpm',
'113nm@4200rpm', '82.1Nm@3400rpm', nan, '82.1nm@3400rpm',
'69Nm@3500rpm', '98.5Nm@4300rpm', '85Nm@3000rpm', '98nm@3000rpm',
'69nm@3500rpm', '205Nm@1750-3250rpm', '102Nm@4000rpm',
'16 @ 2,000 (kgm@rpm)', '130@2500(kgm@rpm)', '215Nm@1750-2500rpm',
'119Nm@4250rpm', '160Nm@2000rpm', '14.9 @ 3,400 (kgm@rpm)',
'385Nm@1600-2500rpm', '10.7 @ 2,500 (kgm@rpm)',
'215Nm@1750-3000rpm', '204Nm @ 2000-2750rpm',
'16.3 @ 2,000 (kgm@rpm)', '150Nm@4500rpm',
'14.75 @ 3,400 (kgm@rpm)', '120Nm@4250rpm', '149Nm@4500rpm',
'515Nm@4250+-50rpm', '420Nm@2000-2500rpm', '215nm@1750-2500rpm',
'330Nm@1800rpm', '119nm@4250rpm', '215Nm@1750-3000',
'380Nm@2500rpm', '112Nm@4000rpm', '140Nm @ 4500rpm',
'204Nm@2000-2750rpm', '140Nm@4500rpm', '14.8 @ 3,400 (kgm@rpm)',
'470Nm@1750-2500rpm', '136Nm@4250rpm', '170Nm@1500-4500rpm',
'33.7 @ 1,800 (kgm@rpm)', '500Nm@1370-4500rpm', '830Nm',
'320nm@1500–4180rpm', '370Nm@1600-4500rpm', '600nm@1900-5000rpm',
'320nm@1450–4200', '540', '500nm@1370-4500rpm', '664',
'370nm@1600-4300bhp', '800nm@2200-4500rpm', '630Nm',
'800nm@2050-4500rpm', '660Nm@1700rpm', '770 Nm@2000-4500rpm',
'700Nm@1600rpm', '900nm@1350-4500rpm', '900Nm@1350-4500rpm',
'450Nm@1500-5200rpm', '550Nm@2350-5230rpm', '320Nm@1450-4200rpm',
'400nm@1750-2500rpm', '280nm@1350-4600rpm', '400Nm@1550-4400rpm',
'400Nm@1750-2500rpm', '620Nm@1500-2500rpm', '500Nm@1850-5000rpm',
'760nm@2000-3000rpm', '350nm@1450-4800rpm', '500Nm@1600-4500rpm',
'750nm@1800-5860rpm', '450Nm@1500-2000rpm', '750nm@1800-5600rpm',
'620nm@2000–2500rpm', '450Nm@1380-5000rpm', '430Nm',
'500nm@1600-4500rpm', '450Nm@1500–5200rpm', '600nm@2600-5600rpm',
'400nm@1750–2500rpm', '650Nm@2750-5500rpm', '620Nm@2000-2500rpm',
'620Nm@1500–2500rpm', '620nm@2000–2500', '1600Nm@2000-6000rpm',
'1250Nm@2200-5500rpm', '250Nm@1400-2400rpm', '760Nm@3000-5250rpm',
'770nm@3250rpm', '760Nm@3000-5750rpm', '718Nm@7000rpm',
'800Nm@6000rpm', '110Nm@4800rpm', '145Nm@4300rpm', '200Nm@1750rpm',
'127nm@4500-5000', '145Nm@4600rpm', '160Nm@1750rpm',
'143.8nm@4500rpm', '113.8nm@4000rpm', '99Nm@4500 rpm',
'172Nm@1500-4000rpm', '240.26nm@1500-2750rpm',
'250nm@1500-2750rpm', '192nm@4500rpm', '114.74nm@4200rpm',
'191nm@4500rpm', '95.2nm@4000rpm', '171.62nm@1500-4000rpm',
'143.8Nm@4500rpm', '240Nm@1500-2750rpm', '172nm@1500-4000rpm',
'113.8Nm@4000rpm', '242nm@1500-3200rpm', '250Nm@1500-2750rpm',
'416nm@2000-2750rpm', '85.3Nm@4500 rpm', '360Nm@2000-2500rpm',
'360nm@2000-2500rpm', '176Nm@1500-2400 rpm', '250nm@2500-4000rpm',
'350Nm@1750-2500rpm', '400nm@3000rpm', '350nm@1750-2500rpm',
'144nm@4500rpm', '240nm@1500-2750rpm', '115nm@4200rpm',
'440nm@1750-2750rpm', '605nm', '350nm', '560Nm@6500rpm',
'565Nm@6500rpm', '560Nm', '600Nm@6500rpm', '565Nm',
'720Nm@6750rpm', '850nm@2250-4500rpm', '239Nm@4300-4500rpm',
'350Nm@5100rpm', '350nm@5100rpm', '335Nm@4600rpm',
'202Nm@3600-5200rpm', '530Nm@3200rpm', '200Nm@2000-3500rpm',
'420Nm@1600-2600rpm', '300Nm@1500-2500rpm', '210nm@1600-2200rpm',
'210Nm@1600-2200rpm', '380nm@1750-3000rpm', '115Nm@3500-3600rpm',
'300nm@1250-3000rpm', '260nm@1750-2250rpm', '320nm@1500-3000rpm',
'200nm@1400-2200rpm', '420Nm@1600-2800rpm', '91Nm@3000rpm',
'300nm@1600-2800rpm', '300Nm@1600-2800rpm', '300Nm@1750-2500rpm',
'360nm@1500-2800rpm', '450Nm@1750-2800rpm', '600Nm@2000-2600rpm',
'600Nm@2000-4000rpm', '580Nm@1750-4500rpm', '580nm',
'580Nm@2000-5750rpm', '450nm', '500Nm@1750-4750rpm',
'580Nm@2250-4000rpm', '650Nm@2000-4000rpm', '510Nm@4750rpm',
'700Nm@1600-4000rpm', '600nm@1200-3200rpm', '500nm@5000-5250rpm',
'760Nm', '650nm@1750-4500rpm', '550nm@1800-2200rpm',
'900nm@2000-4000rpm', '500nm@1600-2400rpm', '850nm@2500-4500rpm',
'850nm@2500-5000rpm', '400Nm@1600-2800rpm', '400Nm@3000-4000rpm',
'520nm@1800-5800rpm', '250nm@1620-4000rpm', '400nm@3000-4000rpm',
'300nm@1800-4000rpm', '320Nm@1650-4000rpm', '850nm@2500–3500rpm',
'700nm@1200-3200rpm', '380Nm@1400-2400rpm', '370Nm@1800-4000rpm',
'500Nm@1600-2400rpm', '700Nm@1200-3000rpm', '500Nm@1600-4000rpm',
'520nm@2500-5000rpm', '320nm@1650-4000rpm', '700Nm@2100–5500rpm',
'600Nm@1200-3200rpm', '400Nm@1600-2600rpm', '900nm@2500-4500rpm',
'700nm@2000-4500rpm', '400nm@2800rpm', '380Nm@1200-4000rpm',
'440nm@1800-2800rpm', '250nm@1620–4000rpm', '730nm@2500-4500rpm',
'1020Nm', '320Nm@1400-3500rpm', '250nm@1600-3600rpm',
'250Nm@1600-3600rpm', '373.5Nm@1500-2400rpm',
'478.5Nm@1500-2400rpm', '220nm@3600rpm', '144nm@4400rpm', '280nm',
'280Nm@1250rpm', '320Nm', '270Nm@1000rpm', '280Nm@1350rpm',
'132.3Nm@3300rpm', '226Nm @ 4100rpm', '29.8@2000 (kgm@rpm)',
'39.1 @ 2,000 (kgm@rpm)', '17.8 @ 4,250 (kgm@rpm)',
'12.5@3,000 (kgm@rpm)', '275Nm@2000rpm', '366Nm@3500rpm',
'441Nm@2000rpm', '175Nm@4250rpm', '350Nm@1800-3500rpm',
'222Nm@4100rpm', '400Nm@2000-2500rpm', '160nm@2800-3600rpm',
'152nm@2200-4400rpm', '96nm@3500rpm', '254nm@1600rpm',
'637Nm@3300-5800rpm', '142Nm@4000rpm', '448.77nm@1960-4500rpm',
'450Nm1950–5000', '470Nm@6100rpm', '650Nm', '530Nm@2300-5000',
'770nm@2000-4500rpm', '500Nm', '380Nm@1950-4500rpm',
'770nm@1960-4500rpm', '345Nm', '450Nm@1340-5300rpm',
'420Nm@8000rpm', '550Nm@1900-5600rpm', '450Nm@6750rpm',
'420Nm@5000–6500/5500rpm', '420Nm', '450nm@1340-5300rpm',
'850Nm@2300to4500rpm', '400Nm@1800-4500rpm', '520Nm@1850-5000rpm',
'620Nm@1800–4500rpm', '770Nm@1960-4500rpm', '96Nm@3500rpm',
'72Nm@4250rpm', '91Nm@4250rpm', '900Nm@1700rpm', '820Nm@1500rpm',
'840Nm@1650-4750rpm', '850Nm@1600rpm', '250nm@1600-3500rpm',
'320nm@1500-4100rpm', '178nm@1750-4500rpm', '320nm@1500-3990rpm',
'320nm@1450-4200rpm', '170nm@1750-4000rpm', '113nm@3300rpm',
'113Nm@3300+/-100rpm', '113Nm@3300rpm', '250Nm', '170nm',
'250Nm@1000-2000rpm', '260nm@1500-2750rpm', '200nm@1250-3000rpm',
'95nm@3500rpm', '140nm@1500-5500rpm', '245nm', '170Nm',
'420nm@1400-3400rpm', '500nm@1600-2800rpm', '245nm@4000rpm',
'113nm@4400rpm', '122Nm@4400-4800rpm', '500Nm@1600-2800rpm',
'136.8Nm@4400rpm', '221Nm@3600to5200rpm', '198nm@2800-4000rpm',
'175Nm@1750-4000rpm', '250Nm@1600-3500rpm', '320Nm@1500-4100rpm',
'640Nm@1740rpm', '350Nm', '300nm', '440Nm@1500-3000rpm', '660Nm'],
dtype=object)
In [149]:
def nm_rpm_preprocess(text):
try:
global nm_final_df,k,names
nm=[]
rpm=[]
text=str(text)
split_value=text.split("@",1)
split_value.reverse()
backup_split_value=split_value
if len(split_value)==1:
check="".join(re.findall('[a-zA-Z]',split_value[0])).lower()
if check=="nmrpm":
_replace=split_value[0].replace("nm","nm@")
lists=_replace.split("@")
split_value=[]
split_value.append(lists[1])
split_value.append(lists[0])
elif check=="nm":
try:
_replace=split_value[0].replace("Nm","nm@")
if "@" not in _replace:
raise Exception("own exception")
except:
_replace=split_value[0].replace("nm","nm@")
lists=_replace.split("@")
split_value=[]
split_value.append(lists[1])
split_value.append(lists[0])
if split_value[0]=="":
split_value=backup_split_value
if len(split_value)==2:
first="".join(re.findall('[a-zA-Z]',split_value[0])).lower()
if first=="torpm":
split_value[0]=split_value[0].replace("to","-")
if first=="bhp":
split_value[0]=split_value[0].replace(first,"rpm")
j=0
Activate=False
for i in split_value:
if len(split_value)==2:
check="".join(re.findall('[a-zA-Z]',i)).lower()
if check=="kgmrpm":
try:
rpm.append((float(i.replace("".join(re.findall("[\sa-zA-Z\@\(\)]",i)).strip(),""))))
Activate=True
except:
first=i.replace("".join(re.findall("[\sa-zA-Z\@\(\)]",i)).strip(),"")
rpm.append(float(first.replace("".join(re.findall('\,',first)).strip(),"")))
Activate=True
else:
if Activate==True:
convert_into_nm=(float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))*9.80665)
nm.append(convert_into_nm)
Activate=False
else:
if check=="":
if j==1:
nm.append((float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))))
elif j==0:
try:
rpm.append((float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))))
except:
try:
intial=i.replace("".join(re.findall("[a-zA-Z]",i)),"")
lists=intial.split("-")
average_of_rpm=(float(lists[0])+float(lists[1]))/2
rpm.append(average_of_rpm)
except:
lists=intial.split("–")
average_of_rpm=(float(lists[0])+float(lists[1]))/2
rpm.append(average_of_rpm)
else:
if check=="nm":
_nm=float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))
nm.append(_nm)
elif check=="kgm":
convert_into_nm=(float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))*9.80665)
nm.append(convert_into_bhp)
elif check=="rpm":
try:
_rpm=float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))
rpm.append(_rpm)
except:
try:
intial=i.replace("".join(re.findall("[a-zA-Z]",i)),"")
try:
lists=intial.split("-")
try:
average_of_rpm=(float(lists[0])+float(lists[1]))/2
except:
first=float(lists[0])
second=lists.aplit("/")
average_of_rpm=second[1]
rpm.append(average_of_rpm)
except:
lists=intial.split("–")
try:
average_of_rpm=(float(lists[0])+float(lists[1]))/2
except:
second=lists[1].split("/")
average_of_rpm=float(second[1])
rpm.append(average_of_rpm)
except:
try:
intial=i.replace("".join(re.findall("[a-zA-Z]",i)),"")
lists=intial.split("+-")
_first=float(lists[0])#take the average value
rpm.append(_first)
except:
intial=i.replace("".join(re.findall("[a-zA-Z]",i)),"")
lists=intial.split("+/-")
_first=float(lists[0])#take the average value
rpm.append(_first)
else:
check="".join(re.findall('[a-zA-Z]',i)).lower()
if check=="nm":
nm.append((float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))))
rpm.append(0)
elif check=="rpm":
rpm.append((float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))))
nm.append(0)
elif check=="nan":
nm.append(np.nan)
rpm.append(np.nan)
else:
nm.append((float(i.replace("".join(re.findall("[a-zA-Z]",i)),""))))
rpm.append(0)
j=j+1
temp=pd.DataFrame({
"NM":nm,
"NM_RPM":rpm
})
nm_final_df=pd.concat([nm_final_df,temp],axis=0)
print(k)
k=k+1
except:
print("UnSuccessfully executed",k)
names.append(main["Max Torque (nm@rpm)"].iloc[k])
k=k+1
In [223]:
# 113Nm@4400rpm
# 205Nm@1750-3250rpm
# 16 @ 2,000 (kgm@rpm)
# 515Nm@4250+-50rpm
# 830Nm
# 540
# 420Nm@5000–6500/5500rpm
# 450Nm1950–5000
# 850Nm@2300to4500rpm
# 113Nm@3300+/-100rpm
# 370nm@1600-4300bhp
names=[]
k=0
nm_final_df=pd.DataFrame()
pd.Series(main["Max Torque (nm@rpm)"]).apply(nm_rpm_preprocess)
In [ ]:
nm_final_df.shape
In [ ]:
main.shape
In [150]:
main["Max Torque (nm@rpm)"].iloc[124596]
Out[150]:
'448.77nm@1960-4500rpm'
In [151]:
nm_final_df[["NM","NM_RPM"]].iloc[124596]
Out[151]:
NM 448.77 NM_RPM 3230.00 Name: 124596, dtype: float64
In [60]:
nm_final_df=nm_final_df.reset_index(drop=True)
In [61]:
main=main.reset_index(drop=True)
In [62]:
main=pd.concat([main,nm_final_df],axis=1)
In [63]:
main.drop(["Max Torque (nm@rpm)"],axis=1,inplace=True)
In [64]:
main.drop(["Unnamed: 0"],axis=1,inplace=True)
In [65]:
main.columns
Out[65]:
Index(['Model', 'Brand', 'Varient', 'ARAI Mileage(Km/L)',
'Engine Displacement (cc)', 'Seating Capacity', 'Boot Space (Litres)',
'Body Type', 'Fuel Type', 'No. of cylinder',
...
'Tyre Pressure Monitor', 'Rain Sensing Wiper', 'Turbo Charger',
'Air Quality Control', 'Traction Control',
'Vehicle Stability Control System', 'BHP', 'RPM', 'NM', 'NM_RPM'],
dtype='object', length=153)
In [66]:
main["TransmissionType"]
Out[66]:
0 Manual
1 Manual
2 Manual
3 Automatic
4 Manual
...
160026 Automatic
160027 Automatic
160028 Automatic
160029 Automatic
160030 Automatic
Name: TransmissionType, Length: 160031, dtype: object
In [67]:
main["TransmissionType"].unique()
Out[67]:
array(['Manual', 'Automatic'], dtype=object)
In [68]:
plt.figure(figsize=(15,5))
sns.countplot(main["TransmissionType"])
Out[68]:
<AxesSubplot:xlabel='TransmissionType', ylabel='count'>
In [69]:
main["Fuel Tank Capacity"]
Out[69]:
0 37.0
1 40.0
2 48.0
3 37.0
4 27.0
...
160026 NaN
160027 NaN
160028 60.0
160029 NaN
160030 NaN
Name: Fuel Tank Capacity, Length: 160031, dtype: float64
In [70]:
main["Multi-function Steering Wheel"]
Out[70]:
0 YES
1 NaN
2 YES
3 YES
4 NO
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Multi-function Steering Wheel, Length: 160031, dtype: object
In [71]:
main.columns
Out[71]:
Index(['Model', 'Brand', 'Varient', 'ARAI Mileage(Km/L)',
'Engine Displacement (cc)', 'Seating Capacity', 'Boot Space (Litres)',
'Body Type', 'Fuel Type', 'No. of cylinder',
...
'Tyre Pressure Monitor', 'Rain Sensing Wiper', 'Turbo Charger',
'Air Quality Control', 'Traction Control',
'Vehicle Stability Control System', 'BHP', 'RPM', 'NM', 'NM_RPM'],
dtype='object', length=153)
In [72]:
#we handle the nan values after done this
main["Multi-function Steering Wheel"].unique()
Out[72]:
array(['YES', nan, 'NO', 'OPTIONAL'], dtype=object)
In [73]:
plt.figure(figsize=(15,5))
sns.countplot(main["Multi-function Steering Wheel"])
Out[73]:
<AxesSubplot:xlabel='Multi-function Steering Wheel', ylabel='count'>
In [74]:
main["Touch Screen"]
Out[74]:
0 YES
1 NaN
2 YES
3 YES
4 NO
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Touch Screen, Length: 160031, dtype: object
In [75]:
main["Touch Screen"].unique()
Out[75]:
array(['YES', nan, 'NO', 'OPTIONAL'], dtype=object)
In [76]:
plt.figure(figsize=(15,5))
sns.countplot(main["Touch Screen"])
Out[76]:
<AxesSubplot:xlabel='Touch Screen', ylabel='count'>
In [77]:
main.columns[15]
Out[77]:
'Alloy Wheels'
In [78]:
main["Engine Start Stop Button"]
Out[78]:
0 YES
1 NaN
2 YES
3 YES
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Engine Start Stop Button, Length: 160031, dtype: object
In [79]:
main["Engine Start Stop Button"].unique()
Out[79]:
array(['YES', nan, 'NO'], dtype=object)
In [80]:
plt.figure(figsize=(15,5))
sns.countplot(main["Engine Start Stop Button"])
Out[80]:
<AxesSubplot:xlabel='Engine Start Stop Button', ylabel='count'>
In [81]:
main.columns[16]
Out[81]:
'Power Windows Rear'
In [82]:
main["Alloy Wheels"]
Out[82]:
0 YES
1 NaN
2 YES
3 YES
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Alloy Wheels, Length: 160031, dtype: object
In [83]:
main["Alloy Wheels"].unique()
Out[83]:
array(['YES', nan, 'NO', 'OPTIONAL', '4'], dtype=object)
In [84]:
plt.figure(figsize=(15,5))
sns.countplot(main["Alloy Wheels"])
Out[84]:
<AxesSubplot:xlabel='Alloy Wheels', ylabel='count'>
In [85]:
main.loc[main["Alloy Wheels"]=="4"][["Model","Place","Brand","Varient","Alloy Wheels"]]
Out[85]:
| Model | Place | Brand | Varient | Alloy Wheels | |
|---|---|---|---|---|---|
| 48957 | BMW | On-Road Price in Pune : (Not Available in Sola... | X3 | X3 xDrive30i M Sport | 4 |
| 48966 | BMW | On-Road Price in Ranchi : | X3 | X3 xDrive30i SportX Plus | 4 |
| 48969 | BMW | On-Road Price in Kolkata : (Not Available in G... | X3 | X3 xDrive30i M Sport | 4 |
| 49007 | BMW | On-Road Price in Ranchi : | X3 | X3 xDrive30i M Sport | 4 |
| 49015 | BMW | On-Road Price in Faridabad : | X3 | X3 xDrive30i M Sport | 4 |
| ... | ... | ... | ... | ... | ... |
| 52739 | BMW | On-Road Price in New Delhi : | X3 | X3 xDrive30i SportX Plus | 4 |
| 52760 | BMW | On-Road Price in Kolkata : (Not Available in G... | X3 | X3 xDrive30i M Sport | 4 |
| 52783 | BMW | On-Road Price in Pune : | X3 | X3 xDrive30i SportX Plus | 4 |
| 52816 | BMW | On-Road Price in Vadodara : | X3 | X3 xDrive30i M Sport | 4 |
| 52845 | BMW | On-Road Price in Surat : | X3 | X3 xDrive30i SportX Plus | 4 |
187 rows × 5 columns
In [86]:
#The above problem occured to due to same tag class name
#The problem can be resolved by using the domain knowledge
#so i see the bmw x3 which has three varient all of them have alloy wheels
def make_correction(text):
if text=="4":
return text.replace("4","YES")
else:
return text
In [87]:
main["Alloy Wheels"]=pd.Series(main["Alloy Wheels"]).apply(make_correction)
In [88]:
plt.figure(figsize=(15,5))
sns.countplot(main["Alloy Wheels"])
Out[88]:
<AxesSubplot:xlabel='Alloy Wheels', ylabel='count'>
In [89]:
main["Alloy Wheels"].unique()
Out[89]:
array(['YES', nan, 'NO', 'OPTIONAL'], dtype=object)
In [90]:
main["Power Windows Rear"]
Out[90]:
0 YES
1 NaN
2 YES
3 YES
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Power Windows Rear, Length: 160031, dtype: object
In [91]:
main["Power Windows Rear"].unique()
Out[91]:
array(['YES', nan, 'NO'], dtype=object)
In [92]:
plt.figure(figsize=(15,5))
sns.countplot(main["Power Windows Rear"])
Out[92]:
<AxesSubplot:xlabel='Power Windows Rear', ylabel='count'>
In [93]:
main["Wheel Covers"]
Out[93]:
0 NO
1 NaN
2 NaN
3 NO
4 NO
...
160026 NaN
160027 NaN
160028 NO
160029 NaN
160030 NaN
Name: Wheel Covers, Length: 160031, dtype: object
In [94]:
main["Wheel Covers"].unique()
Out[94]:
array(['NO', nan, 'YES'], dtype=object)
In [95]:
plt.figure(figsize=(15,5))
sns.countplot(main["Wheel Covers"])
Out[95]:
<AxesSubplot:xlabel='Wheel Covers', ylabel='count'>
In [96]:
main["Driver Airbag"]
Out[96]:
0 YES
1 YES
2 YES
3 YES
4 YES
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Driver Airbag, Length: 160031, dtype: object
In [97]:
main["Driver Airbag"].unique()
Out[97]:
array(['YES', nan, 'NO'], dtype=object)
In [98]:
plt.figure(figsize=(15,5))
sns.countplot(main["Driver Airbag"])
Out[98]:
<AxesSubplot:xlabel='Driver Airbag', ylabel='count'>
In [99]:
main["Air Conditioner"]
Out[99]:
0 YES
1 YES
2 YES
3 YES
4 YES
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Air Conditioner, Length: 160031, dtype: object
In [100]:
main["Air Conditioner"].unique()
Out[100]:
array(['YES', nan, 'NO', '5', '4', '2', '1890', '2923', '506mm', '348'],
dtype=object)
In [101]:
plt.figure(figsize=(15,5))
sns.countplot(main["Air Conditioner"])
Out[101]:
<AxesSubplot:xlabel='Air Conditioner', ylabel='count'>
In [3]:
#This problem occured due to same class name in tag during collecting the data
def preprocess_to_null_out(catagorie,changer):
catagorie=str(catagorie)
if catagorie=="nan":
return np.nan
if changer==False:
if catagorie!="YES" and catagorie!="NO":
return np.nan
else:
return catagorie
else:
if catagorie!="YES" and catagorie!="NO" and catagorie!="OPTIONAL":
return np.nan
else:
return catagorie
In [103]:
main["Air Conditioner"]=main["Air Conditioner"].apply(lambda x:preprocess_to_null_out(x,False))
In [104]:
plt.figure(figsize=(15,5))
sns.countplot(main["Air Conditioner"])
Out[104]:
<AxesSubplot:xlabel='Air Conditioner', ylabel='count'>
In [105]:
main["Power Adjustable Exterior Rear View Mirror"]
Out[105]:
0 YES
1 NaN
2 YES
3 YES
4 NO
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Power Adjustable Exterior Rear View Mirror, Length: 160031, dtype: object
In [106]:
main["Power Adjustable Exterior Rear View Mirror"].unique()
Out[106]:
array(['YES', nan, 'NO'], dtype=object)
In [107]:
plt.figure(figsize=(15,5))
sns.countplot(main["Power Adjustable Exterior Rear View Mirror"])
Out[107]:
<AxesSubplot:xlabel='Power Adjustable Exterior Rear View Mirror', ylabel='count'>
In [108]:
main["Automatic Climate Control"]
Out[108]:
0 YES
1 NaN
2 YES
3 YES
4 NaN
...
160026 OPTIONAL
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Automatic Climate Control, Length: 160031, dtype: object
In [109]:
main["Automatic Climate Control"].unique()
Out[109]:
array(['YES', nan, 'NO', 'OPTIONAL', '5', '4', '2 Zone', '3 Zone',
'4 Zone', '2923', '2765'], dtype=object)
In [110]:
plt.figure(figsize=(15,5))
sns.countplot(main["Automatic Climate Control"])
Out[110]:
<AxesSubplot:xlabel='Automatic Climate Control', ylabel='count'>
In [111]:
main["Automatic Climate Control"]=main["Automatic Climate Control"].apply(lambda x:preprocess_to_null_out(x,True))
In [112]:
plt.figure(figsize=(15,5))
sns.countplot(main["Automatic Climate Control"])
Out[112]:
<AxesSubplot:xlabel='Automatic Climate Control', ylabel='count'>
In [113]:
main["Automatic Climate Control"]=main["Automatic Climate Control"].apply(lambda x:preprocess_to_null_out(x,True))
In [114]:
plt.figure(figsize=(15,5))
sns.countplot(main["Air Conditioner"])
Out[114]:
<AxesSubplot:xlabel='Air Conditioner', ylabel='count'>
In [115]:
main.columns[23]
Out[115]:
'Fog Lights - Front'
In [116]:
main["Anti Lock Braking System"]
Out[116]:
0 YES
1 YES
2 YES
3 YES
4 YES
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Anti Lock Braking System, Length: 160031, dtype: object
In [117]:
main["Anti Lock Braking System"].unique()
Out[117]:
array(['YES', nan, 'NO', 'OPTIONAL'], dtype=object)
In [118]:
plt.figure(figsize=(15,5))
sns.countplot(main["Anti Lock Braking System"])
Out[118]:
<AxesSubplot:xlabel='Anti Lock Braking System', ylabel='count'>
In [119]:
main["Anti Lock Braking System"]
Out[119]:
0 YES
1 YES
2 YES
3 YES
4 YES
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Anti Lock Braking System, Length: 160031, dtype: object
In [120]:
main["Anti Lock Braking System"].unique()
Out[120]:
array(['YES', nan, 'NO', 'OPTIONAL'], dtype=object)
In [121]:
plt.figure(figsize=(15,5))
sns.countplot(main["Anti Lock Braking System"])
Out[121]:
<AxesSubplot:xlabel='Anti Lock Braking System', ylabel='count'>
In [122]:
main["Fog Lights - Front"]
Out[122]:
0 YES
1 NaN
2 YES
3 YES
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Fog Lights - Front, Length: 160031, dtype: object
In [123]:
main["Fog Lights - Front"].unique()
Out[123]:
array(['YES', nan, 'NO', '1039'], dtype=object)
In [124]:
plt.figure(figsize=(15,5))
sns.countplot(main["Fog Lights - Front"])
Out[124]:
<AxesSubplot:xlabel='Fog Lights - Front', ylabel='count'>
In [125]:
main["Fog Lights - Front"]=main["Fog Lights - Front"].apply(lambda x:preprocess_to_null_out(x,False))
In [126]:
plt.figure(figsize=(15,5))
sns.countplot(main["Fog Lights - Front"])
Out[126]:
<AxesSubplot:xlabel='Fog Lights - Front', ylabel='count'>
In [127]:
main["Power Windows Front"]
Out[127]:
0 YES
1 NaN
2 YES
3 YES
4 NO
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Power Windows Front, Length: 160031, dtype: object
In [128]:
main["Power Windows Front"].unique()
Out[128]:
array(['YES', nan, 'NO'], dtype=object)
In [129]:
plt.figure(figsize=(15,5))
sns.countplot(main["Power Windows Front"])
Out[129]:
<AxesSubplot:xlabel='Power Windows Front', ylabel='count'>
In [130]:
main["Passenger Airbag"]
Out[130]:
0 YES
1 OPTIONAL
2 YES
3 YES
4 YES
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Passenger Airbag, Length: 160031, dtype: object
In [131]:
main["Passenger Airbag"].unique()
Out[131]:
array(['YES', 'OPTIONAL', nan, 'NO'], dtype=object)
In [132]:
plt.figure(figsize=(15,5))
sns.countplot(main["Passenger Airbag"])
Out[132]:
<AxesSubplot:xlabel='Passenger Airbag', ylabel='count'>
In [133]:
main["Power Steering"]
Out[133]:
0 YES
1 NaN
2 YES
3 YES
4 YES
...
160026 5
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Power Steering, Length: 160031, dtype: object
In [134]:
main["Power Steering"].unique()
Out[134]:
array(['YES', nan, 'NO', '5', '4', '2', '1890', '2923', '506mm', '3200',
'348', '2765'], dtype=object)
In [135]:
plt.figure(figsize=(15,5))
sns.countplot(main["Power Steering"])
Out[135]:
<AxesSubplot:xlabel='Power Steering', ylabel='count'>
In [136]:
main["Power Steering"]=main["Power Steering"].apply(lambda x:preprocess_to_null_out(x,False))
In [137]:
plt.figure(figsize=(15,5))
sns.countplot(main["Power Steering"])
Out[137]:
<AxesSubplot:xlabel='Power Steering', ylabel='count'>
In [138]:
main["Engine Type"]
Out[138]:
0 NaN
1 G12B
2 K15B Smart Hybrid
3 NaN
4 K10C
...
160026 NaN
160027 NaN
160028 NaN
160029 NaN
160030 NaN
Name: Engine Type, Length: 160031, dtype: object
In [139]:
main["Engine Type"].unique()
Out[139]:
array([nan, 'G12B', 'K15B Smart Hybrid', 'K10C',
'K15 Smart Hybrid Petrol Engine', 'K Series Dual jet', 'VVT',
'F8D Petrol Engine', 'K15C Smart Hybrid', '1.2 L K Series Engine',
'Multi Point Fuel Injection G12B BS—VI', 'K15C',
'K15C Smart Hybrid`', 'K12N', 'F8D', 'TDCi Diesel Engine',
'Petrol Engine', 'In-Line Engine', 'Ti-VCT Petrol Engine',
'Diesel Engine', 'TDCI Diesel Engine', 'Duratorq Diesel Engine',
'1.5 L TDCi', 'Duratec Petrol Engine', '1.2 litre Petrol Engine',
'5.0L Ti-VCT V8', 'EcoBlue engine', '1.5 litre Diesel Engine',
'1.5 L Petrol Engine', '1.5 L TiVCT', '1.5 L Diesel Engine',
'Ti VCT Petrol Engine', 'Ecoboost Petrol Engine',
'1.2 L Petrol Engine', '1.5L Petrol Engine', '3.0L V6 TFSI',
'2.0 L 40 TFS', '3.0 litre V6 TFSI', 'In Line Petrol Engine', 'V6',
'2.0 L TFSI petrol engine', '55 TFSI quattro tiptronic',
'2.0 L TFSI', 'V8 Twin Turbo Engine', 'TFSI Petrol Engine',
'4.0L TFSI quattro Engine', '3.0 L V6 TFSI Petrol engine',
'Twin Turbocharged V8 Engi',
'4.0 V8 twin-turbocharged petrol engine',
'Twin Turbocharger V8 Engi', 'Twin Turbocharged W12 Eng',
'V8 Engine', 'TwinPower Turbo 6-cylinder engine',
'M TwinPower Turbo inline', 'TwinPower Turbo 4-cylinder',
'TwinPower Turbo 4-Cylinder engine',
'TwinPower Turbo 4 Cylinder Petrol Engine',
'Twin Turbo 4 Cylinder Diesel Engine',
'TwinPower Turbo inline 4-cylinder engine',
'Twin Turbo 6 Cylinder Petrol Engine',
'TwinPower Turbo 6-cylinder',
'TwinPower Turbo inline 6-cylinder engine',
'Twin Power Turbo Engine', '4.4 Litre V8 Twin Turbo Diesel Engine',
'TwinPower Turbo inline 6-cylinder petrol',
'TwinPower Turbo inline 6 cylinder engine',
'BUGATTI 2-Stage Turbochar', 'W16 Engine', 'FM 2.6 CR CD',
'V8 Turbo', '90-degree V8 twin turbo Engine', 'V8 - 90° turbo',
'V12 - 65°', 'V8-90°-turbo', '1.2 i-VTEC',
'Water Cooled Inline i-VTEC DOHC with VTC', 'i-DTEC', 'i-VTEC',
'water cooled inline 4 cylinder',
'Water Cooled Inline i-DTEC DOHC', '1.5 L MPi Petrol',
'1.2 l Kappa dual petrol', '1.1 L Petrol', '1.0 Kappa Turbo GDi',
'1.5 l U2 diesel', 'Kappa 1.0 l turbo GDi', '1.5 L U2 CRDi Diesel',
'Nu 2.0 Petrol', '1.2 l Kappa petrol', '2.0 l petrol MPi engine',
'1.2 Kappa petrol', '1.0 l Turbo GDi petrol', '1.5 l MPi petrol',
'1.5 l diesel CRDi engine', '1.5 U2 CRDi',
'1.0 l Kappa turbo GDi petrol', '1.2 Kappa Petrol',
'1.4 L Turbo GDi Petrol', '1.0 Turbo GDI', '1.5 l U2 CRDi diesel',
'R 2.0 Diesel', '1.0 Turbo GDi petrol', '1.1 L Bi-Fuel',
'1.2 l Bi-fuel', '1.9L Ddi Diesel', 'VGS Turbo Intercooled',
'1.4L MultiAir Petrol', '2.0 L Multijet Diesel',
'2.0L GME T4 DI TC', '2.0L Multijet Diesel',
'2.0 Multijet II Diesel', '1.4 L MultiAir Petrol',
'Smartstream G1.5', 'SMARTSTREAM G1.5', 'Smartstream G1.0 T - GDi',
'Smartstream G1.4', '1.5 L CRDi VGT', '1.5 L CRDi WGT',
'SMARTSTREAM G1.2', 'Smartstream G1.4 T-GDi', 'D2.2L VGT Diesel',
'5.2 V10 Petrol Engine', 'V10 cylinder 90°,dual injection',
'V12, 60°, MPI Petrol Engine', 'V8 bi-turbo engine',
'2.0 Litre P300 Petrol Engine', '3.0 Litre diesel Engine',
'3.0 Litre Petrol Engine', 'petrol Engine',
'3.0 Litre Diesel Engine', 'TD4 Diesel Engine',
'2.5-liter, 4-cyl. in-line', '3.5-literV6FourCam',
'8GR FXS V6 24-valve DOHC with Dual VVT-i', '2 GR FXS',
'2.5-liter L4 engine', 'V8 32-Valve DOHC Dual VVT',
'1.2L Turbo Petrol', 'mStallion', 'mHawk', '2.2L Diesel Engine',
'1.5L Turbo Diesel', '1.5 Litre mHAWK 75 BSVI Diesel Engine',
'mHAWK75 BSVI', '2.0L Turbo Petrol', 'mFALCON G80',
'mStallion 150 TGDi Engine', 'mHAWk100', 'm2DiCR 4 Cyl 2.5L TB',
'2.2 L Turbo Diesel', 'Electric Engine', 'mHawk 130 Engine',
'D15 1.5 Litre Diesel Engine', 'mhawk',
'4.0-litre V8 32 valve Twin Turbo', '3.0L V6 Diesel Engine',
'V-Type Diesel Engine', 'V-Type Engine', '3.0L V6 Petrol Engine',
'V-Type Petrol Engine', 'V Type Petrol Engine', 'V Type Engine',
'3.0 L in-line 6 cylinder engine', '2.0-litre in line engine',
'4.0-litre V8 Biturbo', 'OM654M', 'V12',
'4.0-litre V8 biturbo engine', '4.0-LITER V8 BITURBO ENGINE',
'In-Line 4 Cylinder diesel Engine',
'AMG 2.0-LITRE 4-CYLINDER ENGINE', '3.0-litre L6 in-line engine',
'L4 200', 'AMG 35 4MATIC', 'In-Line 4 Cylinder Petrol Engine',
'In-Line Petrol Engine', 'OM 656 Engine',
'3.0-litre 6-cylinder In-lineTurbo Engine', 'V8',
'3.0-litre diesel', '2.2L V 220d', '3.0-litre petrol',
'M256 engine', 'V6 bi-turbo engine', 'M 264 petrol engine',
'4.0-L V8 BITURBO engine', '3.0-LITRE V6 BITURBO ENGINE',
'L6 diesel Engine', '2.0L V 220 d', 'V8 biturbo engine',
'OM 654 Diesel engine', '2.0-litre OM654 diesel engine',
'4.0-litre petrol', 'L4 200d', '2.0 L Turbocharged Diesel',
'1.5L Turbocharged Intercooled', '1.5 L Turbocharged Intercooled',
'DIESEL 2.0L TURBO', 'DIESEL 2.0L TWIN TURBO',
'2.0L Turbocharged Diesel', '220TURBO', 'VTi-TECH',
'Petroll Engine', 'Petrol', 'Intercooled Turbocharged',
'Intercooled Turbocharger', '4 Cylinder In-Line Petrol',
'DI-Diesel Engine', '2.4 litre 16 Valve Petrol',
'Common Rail DI-D Engine', 'HRA0 1.0 TURBO PETROL',
'B4D 1.0 NA PETROL', '1.3 L HR13DDT Turbo Petrol',
'V6 Twin Turbo Petrol Engine', '1.5 L HR15 Petrol',
'4.0 Litre V6 Turbo Engine', '2.0L Mid-engine',
'Twin Turbo V8 Engine', 'V6 Petrol Engine',
'naturally aspirated boxer engine', 'Twin-Turbocharged Engine',
'4.0L Petrol engine', '3.0 Litre V6 Engine', 'Twin-turbo V8',
'V6 Diesel Engine', '4.0 V8 twin-turbo', 'V8 Petrol Engine',
'1.0L ENERGY', '0.8 SCe', '1.0 SCe', '1.0L TURBO',
'V12 Petrol Engine', '6.75-litre V12 engine', '1.5 L TSI Petrol',
'2.0 L Turbocharged Petrol Engine', '1.0 L TSI Petrol',
'1.0 TSI Petrol Engine', '1.5 TSI Petrol', '1.5 TSI Petrol Engine',
'1.0 TSI Petrol', '2.0 L TSI Petrol Engine',
'2.0L Turbocharged Petrol', 'Kryotec 2.0 L Turbocharged Engine',
'1.2l Turbocharged Revotron Engine', '1.2 L Revotron',
'1.2 l Revotron Engine', 'Revotron 1.2 l BS6 Engine',
'1.2 l Revotron', 'TATA 4SP CR TCIC',
'1.5l Turbocharged Revotorq Engine', '1.5 l Turbocharged Revotorq',
'1.2L Revotron Engine', '1.2 l i-Turbo', '1.2 L i-CNG',
'2.8 L Diesel engine', '2.8 L Diesel Engine', '2.7L Dual VVT',
'2.7L Petrol Engine', '2.7 L Dual VVT',
'2.5L Dynamic Force Engine', 'Gasoline Hybrid and E-FOUR',
'1.0L TSI', 'TSI Petrol Engine', '1.5L TSI EVO with ACT',
'2.0 TSI', 'Twin Turbo & Super Charge Petrol Engine',
'Turbo Petrol Engine'], dtype=object)
In [140]:
# Overall repetition
# Twin Turbocharged V8 Engi == Twin Turbocharger V8 Engi
# TwinPower Turbo 4 Cylinder Petrol Engine == TwinPower Turbo 4-Cylinder engine
# 2.0 L TFSI petrol engine == 2.0 L 40 TFS
# 2.0L Multijet Diesel == 2.0 L Multijet Diesel
# 1.5 l MPi petrol == 1.5 L MPi Petrol
# 1.2 Kappa petrol == 1.2 l Kappa petrol
# SMARTSTREAM G1.5 == Smartstream G1.5
# 3.0 Litre diesel Engine == 3.0 Litre Diesel Engine
# V-Type Petrol Engine == V Type Petrol Engine
# Petroll Engine == petrol Engine == Petrol
# V-Type Engine == V Type Engine
# 4.0-litre V8 biturbo engine == 4.0-LITER V8 BITURBO ENGINE == 4.0-litre V8 Biturbo
# 2.0 L Turbocharged Petrol Engine == 2.0L Turbocharged Petrol
# 2.8 L Diesel engine == 2.8 L Diesel Engine
# 2.7L Dual VVT==2.7 L Dual VVT
# 1.5 TSI Petrol == 1.5 TSI Petrol Engine
# 1.0 TSI Petrol Engine == 1.0 TSI Petrol == 1.0 L TSI Petrol'
In [141]:
def preprocess_Engine_Type(text,varient):
global varient_type,fuel_types
Overall_CNGS=["CNG","ALPG"]
text=str(text)
if text=="nan":
return np.nan
if (Overall_CNGS[0] in varient)or(Overall_CNGS[1] in varient):
if "Petrol" in text:
text=text.replace("Petrol","")
if "`" in text:
text=text.replace("`","")
if ("Petrol" not in text)and("Diesel" not in text)and("CNG" not in text)and("Electric" not in text)and("petrol" not in text)and("diesel" not in text)and("electric" not in text)and("cng" not in text)and("PETROL" not in text):
fuel_type=""
if varient in varient_type:
varient_index=varient_type.index(varient)
fuel_type=fuel_types[varient_index]
else:
if (Overall_CNGS[0] in varient)or(Overall_CNGS[1] in varient):
fuel_type="Cng"
varient_type.append(varient)
fuel_types.append(fuel_type)
else:
fuel_type=main.loc[main["Engine Type"]=="{}".format(text)][["Fuel Type"]].iloc[0,0]
varient_type.append(varient)
fuel_types.append(fuel_type)
fuel_type=fuel_type.title()
text=text.title()
if "Engine" not in text:
text=text+" {}".format(fuel_type)
else:
index_engine=text.find("Engine")
if (text[index_engine-1]==" ")and(text[index_engine]=="E"):
text=text.replace(" E"," {} E".format(fuel_type))
text=text.title()
if (text[text.find("L")-1] != " ")and(text[text.find("L")+1]==" "):
text=text.replace("L"," L")
if "litre" in text:
text=text.title()
if (text[text.find("L")-1] == " ")and(text[text.find("L")+1]==" "):
text=text.replace("L","Litre")
if "Engine" not in text:
text=text+" Engine"
if "-" in text:
text=text.replace("-"," ")
#Direct Handling
if "Liter" in text:
text=text.replace("Liter","Litre")
if "Petroll" in text:
text=text.replace("Petroll","Petrol")
if "Engine" not in text:
text=text.title()+" Engine"
if " " in text:
text=text.replace(" "," ")
if " " in text:
text=text.replace(" "," ")
#Two Exception case:
if text=="1.5 Tsi Petrol Engine":
return "1.5 Litre Tsi Petrol Engine"
if text=="1.0 Tsi Petrol Engine":
return "1.0 Litre Tsi Petrol Engine"
return text
In [142]:
#Maruti brand repetition
# K15C Smart Hybrid == K15C Smart Hybrid`
In [143]:
main.loc[main.Model=="Maruti"][["Engine Type"]]["Engine Type"].unique()
Out[143]:
array([nan, 'G12B', 'K15B Smart Hybrid', 'K10C',
'K15 Smart Hybrid Petrol Engine', 'K Series Dual jet', 'VVT',
'F8D Petrol Engine', 'K15C Smart Hybrid', '1.2 L K Series Engine',
'Multi Point Fuel Injection G12B BS—VI', 'K15C',
'K15C Smart Hybrid`', 'K12N', 'F8D'], dtype=object)
In [144]:
varient_type=[]
fuel_types=[]
_maruti=main.loc[main.Model=="Maruti"][["Engine Type","Varient"]]
In [145]:
_maruti["Engine Type"]=_maruti.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [146]:
_maruti["Engine Type"].unique()
Out[146]:
array([nan, 'G12B Petrol Engine', 'K15B Smart Hybrid Petrol Engine',
'K10C Petrol Engine', 'K15 Smart Hybrid Petrol Engine',
'K Series Dual Jet Petrol Engine', 'Vvt Petrol Engine',
'F8D Cng Engine', 'K15C Smart Hybrid Petrol Engine',
'1.2 Litre K Series Petrol Engine',
'Multi Point Fuel Injection G12B Bs—Vi Petrol Engine',
'K15C Cng Engine', 'K10C Cng Engine',
'Multi Point Fuel Injection G12B Bs—Vi Cng Engine',
'F8D Petrol Engine', 'K Series Dual Jet Cng Engine',
'G12B Cng Engine', 'K12N Petrol Engine'], dtype=object)
In [147]:
main.loc[(main["Model"]=="Maruti") & (main["Fuel Type"]=="CNG")][["Engine Type"]]["Engine Type"].unique()
Out[147]:
array(['F8D Petrol Engine', nan, 'K15C', 'K10C',
'Multi Point Fuel Injection G12B BS—VI', 'K Series Dual jet',
'G12B'], dtype=object)
In [148]:
#Ford brand repetition
# 1.5 litre Diesel Engine == 1.5 L Diesel Engine
# 1.5 L Petrol Engine == 1.5L Petrol Engine
# Ti-VCT Petrol Engine == Ti VCT Petrol Engine
# 1.2 litre Petrol Engine == 1.2 L Petrol Engine
In [149]:
main.loc[main.Model=="Ford"][["Engine Type"]]["Engine Type"].unique()
Out[149]:
array(['TDCi Diesel Engine', 'Petrol Engine', 'In-Line Engine',
'Ti-VCT Petrol Engine', 'Diesel Engine', 'TDCI Diesel Engine',
'Duratorq Diesel Engine', '1.5 L TDCi', 'Duratec Petrol Engine',
nan, '1.2 litre Petrol Engine', '5.0L Ti-VCT V8', 'EcoBlue engine',
'1.5 litre Diesel Engine', '1.5 L Petrol Engine', '1.5 L TiVCT',
'1.5 L Diesel Engine', 'Ti VCT Petrol Engine',
'Ecoboost Petrol Engine', '1.2 L Petrol Engine',
'1.5L Petrol Engine'], dtype=object)
In [150]:
_ford=main.loc[main.Model=="Ford"][["Varient","Engine Type"]]
In [151]:
varient_type=[]
fuel_types=[]
_ford["Engine Type"]=_ford.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [152]:
_ford["Engine Type"].unique()
Out[152]:
array(['Tdci Diesel Engine', 'Petrol Engine', 'In Line Diesel Engine',
'Ti Vct Petrol Engine', 'Diesel Engine', 'Duratorq Diesel Engine',
'1.5 Litre Tdci Diesel Engine', 'Duratec Petrol Engine', nan,
'1.2 Litre Petrol Engine', '5.0 Litre Ti Vct V8 Petrol Engine',
'Ecoblue Diesel Engine', '1.5 Litre Diesel Engine',
'1.5 Litre Petrol Engine', 'In Line Cng Engine',
'1.5 Litre Tivct Petrol Engine', 'Ti Vct Cng Engine',
'Ecoboost Petrol Engine'], dtype=object)
In [153]:
_ford["Engine Type"].unique()
Out[153]:
array(['Tdci Diesel Engine', 'Petrol Engine', 'In Line Diesel Engine',
'Ti Vct Petrol Engine', 'Diesel Engine', 'Duratorq Diesel Engine',
'1.5 Litre Tdci Diesel Engine', 'Duratec Petrol Engine', nan,
'1.2 Litre Petrol Engine', '5.0 Litre Ti Vct V8 Petrol Engine',
'Ecoblue Diesel Engine', '1.5 Litre Diesel Engine',
'1.5 Litre Petrol Engine', 'In Line Cng Engine',
'1.5 Litre Tivct Petrol Engine', 'Ti Vct Cng Engine',
'Ecoboost Petrol Engine'], dtype=object)
In [154]:
#Ford brand repetition
#3.0L V6 TFSI==3.0 litre V6 TFSI
#3.0 litre V6 TFSI == 3.0 L V6 TFSI Petrol engine
#2.0 L TFSI petrol engine==2.0 L TFSI
In [155]:
main.loc[main.Model=="Audi"][["Engine Type"]]["Engine Type"].unique()
Out[155]:
array(['3.0L V6 TFSI', nan, '2.0 L 40 TFS', '3.0 litre V6 TFSI',
'In Line Petrol Engine', 'V6', '2.0 L TFSI petrol engine',
'55 TFSI quattro tiptronic', '2.0 L TFSI', 'V8 Twin Turbo Engine',
'TFSI Petrol Engine', '4.0L TFSI quattro Engine',
'3.0 L V6 TFSI Petrol engine'], dtype=object)
In [156]:
_audi=main.loc[main.Model=="Audi"][["Varient","Engine Type"]]
In [157]:
varient_type=[]
fuel_types=[]
_audi["Engine Type"]=_audi.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [158]:
_audi["Engine Type"].unique()
Out[158]:
array(['3.0 Litre V6 Tfsi Petrol Engine', nan,
'2.0 Litre 40 Tfs Petrol Engine', 'In Line Petrol Engine',
'V6 Petrol Engine', '2.0 Litre Tfsi Petrol Engine',
'55 Tfsi Quattro Tiptronic Petrol Engine',
'V8 Twin Turbo Petrol Engine', 'Tfsi Petrol Engine',
'4.0 Litre Tfsi Quattro Petrol Engine'], dtype=object)
In [159]:
#Bugati brand has no repetition
In [160]:
main.loc[main.Model=="Bugatti"][["Engine Type"]]["Engine Type"].unique()
Out[160]:
array(['BUGATTI 2-Stage Turbochar', 'W16 Engine'], dtype=object)
In [161]:
_bugatti=main.loc[main.Model=="Bugatti"][["Engine Type","Varient"]]
In [162]:
varient_type=[]
fuel_types=[]
_bugatti["Engine Type"]=_bugatti.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [163]:
_bugatti["Engine Type"].unique()
Out[163]:
array(['Bugatti 2 Stage Turbochar Petrol Engine', 'W16 Petrol Engine'],
dtype=object)
In [164]:
#BMW brand has repetition
# TwinPower Turbo 6-cylinder engine == TwinPower Turbo 6-cylinder
# TwinPower Turbo 4-cylinder == TwinPower Turbo 4-Cylinder engine == TwinPower Turbo 4 Cylinder Petrol Engine
# Twin Power Turbo Engine
In [165]:
main.loc[main.Model=="BMW"][["Engine Type"]]["Engine Type"].unique()
Out[165]:
array(['TwinPower Turbo 6-cylinder engine', 'M TwinPower Turbo inline',
'TwinPower Turbo 4-cylinder', 'TwinPower Turbo 4-Cylinder engine',
'TwinPower Turbo 4 Cylinder Petrol Engine', nan,
'Twin Turbo 4 Cylinder Diesel Engine',
'TwinPower Turbo inline 4-cylinder engine',
'Twin Turbo 6 Cylinder Petrol Engine',
'TwinPower Turbo 6-cylinder',
'TwinPower Turbo inline 6-cylinder engine',
'Twin Power Turbo Engine', '4.4 Litre V8 Twin Turbo Diesel Engine',
'TwinPower Turbo inline 6-cylinder petrol',
'TwinPower Turbo inline 6 cylinder engine'], dtype=object)
In [166]:
_bmw=main.loc[main.Model=="BMW"][["Varient","Engine Type"]]
In [167]:
varient_type=[]
fuel_types=[]
_bmw["Engine Type"]=_bmw.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [168]:
_bmw["Engine Type"].unique()
Out[168]:
array(['Twinpower Turbo 6 Cylinder Petrol Engine',
'M Twinpower Turbo Inline Petrol Engine',
'Twinpower Turbo 4 Cylinder Petrol Engine',
'Twinpower Turbo 4 Cylinder Diesel Engine', nan,
'Twin Turbo 4 Cylinder Diesel Engine',
'Twinpower Turbo Inline 4 Cylinder Petrol Engine',
'Twin Turbo 6 Cylinder Petrol Engine',
'Twinpower Turbo Inline 6 Cylinder Petrol Engine',
'Twin Power Turbo Petrol Engine',
'4.4 Litre V8 Twin Turbo Diesel Engine',
'Twinpower Turbo Inline 6 Cylinder Diesel Engine'], dtype=object)
In [169]:
_bmw["Engine Type"].unique()
Out[169]:
array(['Twinpower Turbo 6 Cylinder Petrol Engine',
'M Twinpower Turbo Inline Petrol Engine',
'Twinpower Turbo 4 Cylinder Petrol Engine',
'Twinpower Turbo 4 Cylinder Diesel Engine', nan,
'Twin Turbo 4 Cylinder Diesel Engine',
'Twinpower Turbo Inline 4 Cylinder Petrol Engine',
'Twin Turbo 6 Cylinder Petrol Engine',
'Twinpower Turbo Inline 6 Cylinder Petrol Engine',
'Twin Power Turbo Petrol Engine',
'4.4 Litre V8 Twin Turbo Diesel Engine',
'Twinpower Turbo Inline 6 Cylinder Diesel Engine'], dtype=object)
In [170]:
#Force has no repetition
In [171]:
main.loc[main.Model=="Force"][["Engine Type"]]["Engine Type"].unique()
Out[171]:
array(['FM 2.6 CR CD'], dtype=object)
In [172]:
_force=main.loc[main.Model=="Force"][["Varient","Engine Type"]]
In [173]:
varient_type=[]
fuel_types=[]
_force["Engine Type"]=_force.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [174]:
_force["Engine Type"].unique()
Out[174]:
array(['Fm 2.6 Cr Cd Diesel Engine'], dtype=object)
In [175]:
#Ferrari
#V8 - 90° turbo == V8-90°-turbo
In [176]:
main.loc[main.Model=="Ferrari"][["Engine Type"]]["Engine Type"].unique()
Out[176]:
array(['V8 Turbo', nan, '90-degree V8 twin turbo Engine',
'V8 - 90° turbo', 'V12 - 65°', 'V8-90°-turbo'], dtype=object)
In [177]:
_ferrari=main.loc[main.Model=="Ferrari"][["Varient","Engine Type"]]
In [178]:
varient_type=[]
fuel_types=[]
_ferrari["Engine Type"]=_ferrari.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [179]:
_ferrari["Engine Type"].unique()
Out[179]:
array(['V8 Turbo Petrol Engine', nan,
'90 Degree V8 Twin Turbo Petrol Engine',
'V8 90° Turbo Petrol Engine', 'V12 65° Petrol Engine'],
dtype=object)
In [180]:
#Honda has no repetition
In [181]:
main.loc[main.Model=="Honda"][["Engine Type"]]["Engine Type"].unique()
Out[181]:
array(['1.2 i-VTEC', 'Water Cooled Inline i-VTEC DOHC with VTC', 'i-DTEC',
'i-VTEC', 'water cooled inline 4 cylinder',
'Water Cooled Inline i-DTEC DOHC'], dtype=object)
In [182]:
_honda=main.loc[main.Model=="Honda"][["Varient","Engine Type"]]
In [183]:
varient_type=[]
fuel_types=[]
_honda["Engine Type"]=_honda.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [184]:
_honda["Engine Type"].unique()
Out[184]:
array(['1.2 I Vtec Petrol Engine',
'Water Cooled Inline I Vtec Dohc With Vtc Petrol Engine',
'I Dtec Diesel Engine', 'I Vtec Petrol Engine',
'Water Cooled Inline 4 Cylinder Petrol Engine',
'Water Cooled Inline I Dtec Dohc Diesel Engine'], dtype=object)
In [185]:
#Isuzu has no repetition
In [186]:
main.loc[main.Model=="Isuzu"][["Engine Type"]]["Engine Type"].unique()
Out[186]:
array(['1.9L Ddi Diesel', 'VGS Turbo Intercooled'], dtype=object)
In [187]:
_isuzu=main.loc[main.Model=="Isuzu"][["Varient","Engine Type"]]
In [188]:
varient_type=[]
fuel_types=[]
_isuzu["Engine Type"]=_isuzu.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [189]:
_isuzu["Engine Type"].unique()
Out[189]:
array(['1.9 Litre Ddi Diesel Engine',
'Vgs Turbo Intercooled Diesel Engine'], dtype=object)
In [190]:
# Jeep repetition
# 1.4L MultiAir Petrol == 1.4 L MultiAir Petrol
# 2.0 L Multijet Diesel == 2.0L Multijet Diesel
In [191]:
main.loc[main.Model=="Jeep"][["Engine Type"]]["Engine Type"].unique()
Out[191]:
array(['1.4L MultiAir Petrol', '2.0 L Multijet Diesel',
'2.0L GME T4 DI TC', '2.0L Multijet Diesel', nan,
'2.0 Multijet II Diesel', '1.4 L MultiAir Petrol'], dtype=object)
In [192]:
_jeep=main.loc[main.Model=="Jeep"][["Varient","Engine Type"]]
In [193]:
varient_type=[]
fuel_types=[]
_jeep["Engine Type"]=_jeep.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [194]:
_jeep["Engine Type"].unique()
Out[194]:
array(['1.4 Litre Multiair Petrol Engine',
'2.0 Litre Multijet Diesel Engine',
'2.0 Litre Gme T4 Di Tc Petrol Engine', nan,
'2.0 Multijet Ii Diesel Engine'], dtype=object)
In [195]:
#Kia repetition
#Smartstream G1.5 == SMARTSTREAM G1.5
In [196]:
main.loc[main.Model=="Kia"][["Engine Type"]]["Engine Type"].unique()
Out[196]:
array(['Smartstream G1.5', 'SMARTSTREAM G1.5', 'Smartstream G1.0 T - GDi',
'Smartstream G1.4', '1.5 L CRDi VGT', '1.5 L CRDi WGT',
'SMARTSTREAM G1.2', 'Smartstream G1.4 T-GDi', 'D2.2L VGT Diesel',
nan], dtype=object)
In [197]:
_kia=main.loc[main.Model=="Kia"][["Varient","Engine Type"]]
In [198]:
varient_type=[]
fuel_types=[]
_kia["Engine Type"]=_kia.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [199]:
_kia["Engine Type"].unique()
Out[199]:
array(['Smartstream G1.5 Petrol Engine',
'Smartstream G1.0 T Gdi Petrol Engine',
'Smartstream G1.4 Petrol Engine',
'1.5 Litre Crdi Vgt Diesel Engine',
'1.5 Litre Crdi Wgt Diesel Engine',
'Smartstream G1.2 Petrol Engine',
'Smartstream G1.4 T Gdi Petrol Engine',
'D2.2 Litre Vgt Diesel Engine', nan], dtype=object)
In [200]:
# Lamborghini has no repetition
In [201]:
main.loc[main.Model=="Lamborghini"][["Engine Type"]]["Engine Type"].unique()
Out[201]:
array(['5.2 V10 Petrol Engine', 'V10 cylinder 90°,dual injection',
'V12, 60°, MPI Petrol Engine', 'V8 bi-turbo engine'], dtype=object)
In [202]:
_lamborghini=main.loc[main.Model=="Lamborghini"][["Varient","Engine Type"]]
In [203]:
varient_type=[]
fuel_types=[]
_lamborghini["Engine Type"]=_lamborghini.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [204]:
_lamborghini["Engine Type"].unique()
Out[204]:
array(['5.2 V10 Petrol Engine',
'V10 Cylinder 90°,Dual Injection Petrol Engine',
'V12, 60°, Mpi Petrol Engine', 'V8 Bi Turbo Petrol Engine'],
dtype=object)
In [205]:
# Land_Rover repetition
#3.0 Litre diesel Engine == 3.0 Litre Diesel Engine
In [206]:
main.loc[main.Model=="Land_Rover"][["Engine Type"]]["Engine Type"].unique()
Out[206]:
array(['2.0 Litre P300 Petrol Engine', '3.0 Litre diesel Engine',
'3.0 Litre Petrol Engine', nan, 'petrol Engine',
'3.0 Litre Diesel Engine', 'TD4 Diesel Engine', 'Diesel Engine'],
dtype=object)
In [207]:
_Land_Rover=main.loc[main.Model=="Land_Rover"][["Varient","Engine Type"]]
In [208]:
varient_type=[]
fuel_types=[]
_Land_Rover["Engine Type"]=_Land_Rover.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [209]:
_Land_Rover["Engine Type"].unique()
Out[209]:
array(['2.0 Litre P300 Petrol Engine', '3.0 Litre Diesel Engine',
'3.0 Litre Petrol Engine', nan, 'Petrol Engine',
'Td4 Diesel Engine', 'Diesel Engine'], dtype=object)
In [210]:
# Lexus no repetition
In [211]:
main.loc[main.Model=="Lexus"][["Engine Type"]]["Engine Type"].unique()
Out[211]:
array(['2.5-liter, 4-cyl. in-line', '3.5-literV6FourCam',
'8GR FXS V6 24-valve DOHC with Dual VVT-i', '2 GR FXS',
'2.5-liter L4 engine', 'V8 32-Valve DOHC Dual VVT'], dtype=object)
In [212]:
_Lexus=main.loc[main.Model=="Lexus"][["Varient","Engine Type"]]
In [213]:
varient_type=[]
fuel_types=[]
_Lexus["Engine Type"]=_Lexus.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [214]:
_Lexus["Engine Type"].unique()
Out[214]:
array(['2.5 Litre, 4 Cyl. In Line Petrol Engine',
'3.5 Litrev6Fourcam Petrol Engine',
'8Gr Fxs V6 24 Valve Dohc With Dual Vvt I Petrol Engine',
'2 Gr Fxs Petrol Engine', '2.5 Litre L4 Petrol Engine',
'V8 32 Valve Dohc Dual Vvt Petrol Engine'], dtype=object)
In [215]:
# Mahindra no repetition
# mHawk==mhawk
In [216]:
main.loc[main.Model=="Mahindra"][["Engine Type"]]["Engine Type"].unique()
Out[216]:
array(['1.2L Turbo Petrol', 'mStallion', 'mHawk', '2.2L Diesel Engine',
'1.5L Turbo Diesel', '1.5 Litre mHAWK 75 BSVI Diesel Engine',
'mHAWK75 BSVI', '2.0L Turbo Petrol', 'mFALCON G80',
'mStallion 150 TGDi Engine', 'mHAWk100', 'm2DiCR 4 Cyl 2.5L TB',
'2.2 L Turbo Diesel', 'Electric Engine', 'mHawk 130 Engine',
'D15 1.5 Litre Diesel Engine', 'mhawk'], dtype=object)
In [217]:
_Mahindra=main.loc[main.Model=="Mahindra"][["Varient","Engine Type"]]
In [218]:
varient_type=[]
fuel_types=[]
_Mahindra["Engine Type"]=_Mahindra.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [219]:
_Mahindra["Engine Type"].unique()
Out[219]:
array(['1.2 Litre Turbo Petrol Engine', 'Mstallion Petrol Engine',
'Mhawk Diesel Engine', '2.2 Litre Diesel Engine',
'1.5 Litre Turbo Diesel Engine',
'1.5 Litre Mhawk 75 Bsvi Diesel Engine',
'Mhawk75 Bsvi Diesel Engine', '2.0 Litre Turbo Petrol Engine',
'Mfalcon G80 Petrol Engine', 'Mstallion 150 Tgdi Petrol Engine',
'Mhawk100 Diesel Engine',
'M2Dicr 4 Cyl 2.5 Litre Tb Diesel Engine',
'2.2 Litre Turbo Diesel Engine', 'Electric Engine',
'Mhawk 130 Diesel Engine', 'D15 1.5 Litre Diesel Engine'],
dtype=object)
In [220]:
# Aston_Martin no repetition
In [221]:
main.loc[main.Model=="Aston_Martin"][["Engine Type"]]["Engine Type"].unique()
Out[221]:
array([nan, '4.0-litre V8 32 valve Twin Turbo'], dtype=object)
In [222]:
_Aston_Martin=main.loc[main.Model=="Aston_Martin"][["Varient","Engine Type"]]
In [223]:
varient_type=[]
fuel_types=[]
_Aston_Martin["Engine Type"]=_Aston_Martin.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [224]:
_Aston_Martin["Engine Type"].unique()
Out[224]:
array([nan, '4.0 Litre V8 32 Valve Twin Turbo Diesel Engine'],
dtype=object)
In [225]:
# Maserati no repetition
#check V-Type Engine
#V Type Petrol Engine == V-Type Petrol Engine
In [226]:
main.loc[main.Model=="Maserati"][["Engine Type"]]["Engine Type"].unique()
Out[226]:
array(['3.0L V6 Diesel Engine', 'V-Type Diesel Engine', 'V-Type Engine',
nan, '3.0L V6 Petrol Engine', 'V-Type Petrol Engine',
'V Type Petrol Engine'], dtype=object)
In [227]:
_Maserati=main.loc[main.Model=="Maserati"][["Varient","Engine Type"]]
In [228]:
varient_type=[]
fuel_types=[]
_Maserati["Engine Type"]=_Maserati.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [229]:
_Maserati["Engine Type"].unique()
Out[229]:
array(['3.0 Litre V6 Diesel Engine', 'V Type Diesel Engine',
'V Type Petrol Engine', nan, '3.0 Litre V6 Petrol Engine'],
dtype=object)
In [230]:
#Mercedes-Benz repetition
#4.0-litre V8 Biturbo == 4.0-LITER V8 BITURBO ENGINE == 4.0-litre V8 biturbo engin
#Check OM 656 Engine'
In [231]:
main.loc[main.Model=="Mercedes-Benz"][["Engine Type"]]["Engine Type"].unique()
Out[231]:
array(['V Type Engine', '3.0 L in-line 6 cylinder engine',
'2.0-litre in line engine', nan, '4.0-litre V8 Biturbo', 'OM654M',
'V12', 'Diesel Engine', '4.0-litre V8 biturbo engine',
'4.0-LITER V8 BITURBO ENGINE', 'In-Line 4 Cylinder diesel Engine',
'AMG 2.0-LITRE 4-CYLINDER ENGINE', '3.0-litre L6 in-line engine',
'L4 200', 'AMG 35 4MATIC', 'In-Line 4 Cylinder Petrol Engine',
'In-Line Petrol Engine', 'OM 656 Engine',
'3.0-litre 6-cylinder In-lineTurbo Engine', 'V8',
'3.0-litre diesel', '2.2L V 220d', 'Petrol Engine',
'3.0-litre petrol', 'M256 engine', 'V6 bi-turbo engine',
'M 264 petrol engine', '4.0-L V8 BITURBO engine',
'3.0-LITRE V6 BITURBO ENGINE', 'L6 diesel Engine', '2.0L V 220 d',
'V8 biturbo engine', 'OM 654 Diesel engine',
'2.0-litre OM654 diesel engine', '4.0-litre petrol', 'L4 200d'],
dtype=object)
In [232]:
_Mercedes_Benz=main.loc[main.Model=="Mercedes-Benz"][["Varient","Engine Type"]]
In [233]:
varient_type=[]
fuel_types=[]
_Mercedes_Benz["Engine Type"]=_Mercedes_Benz.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [234]:
_Mercedes_Benz["Engine Type"].unique()
Out[234]:
array(['V Type Diesel Engine',
'3.0 Litre In Litreine 6 Cylinder Diesel Engine',
'2.0 Litre In Line Petrol Engine', nan,
'4.0 Litre V8 Biturbo Petrol Engine', 'Om654M Diesel Engine',
'V12 Petrol Engine', 'Diesel Engine',
'In Line 4 Cylinder Diesel Engine',
'Amg 2.0 Litre 4 Cylinder Petrol Engine',
'3.0 Litre L6 In Line Petrol Engine', 'L4 200 Petrol Engine',
'Amg 35 4Matic Petrol Engine', 'In Line 4 Cylinder Petrol Engine',
'In Line Petrol Engine', 'Om 656 Diesel Engine',
'3.0 Litre 6 Cylinder In Lineturbo Petrol Engine',
'V8 Petrol Engine', '3.0 Litre Diesel Engine',
'2.2 Litre V 220D Diesel Engine', 'Petrol Engine',
'3.0 Litre Petrol Engine', 'M256 Petrol Engine',
'V6 Bi Turbo Petrol Engine', 'M 264 Petrol Engine',
'3.0 Litre V6 Biturbo Petrol Engine', 'L6 Diesel Engine',
'2.0 Litre V 220 D Diesel Engine', 'V8 Biturbo Petrol Engine',
'Om 654 Diesel Engine', '2.0 Litre Om654 Diesel Engine',
'4.0 Litre Petrol Engine', 'L4 200D Diesel Engine'], dtype=object)
In [235]:
#MG repetition
#2.0 L Turbocharged Diesel == 2.0L Turbocharged Diesel
#1.5L Turbocharged Intercooled == 1.5 L Turbocharged Intercooled
In [236]:
main.loc[main.Model=="MG"][["Engine Type"]]["Engine Type"].unique()
Out[236]:
array(['2.0 L Turbocharged Diesel', '1.5L Turbocharged Intercooled',
'1.5 L Turbocharged Intercooled', 'DIESEL 2.0L TURBO',
'DIESEL 2.0L TWIN TURBO', '2.0L Turbocharged Diesel', '220TURBO',
'VTi-TECH', nan], dtype=object)
In [237]:
_MG=main.loc[main.Model=="MG"][["Varient","Engine Type"]]
In [238]:
varient_type=[]
fuel_types=[]
_MG["Engine Type"]=_MG.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [239]:
_MG["Engine Type"].unique()
Out[239]:
array(['2.0 Litre Turbocharged Diesel Engine',
'1.5 Litre Turbocharged Intercooled Petrol Engine',
'Diesel 2.0 Litre Turbo Diesel Engine',
'Diesel 2.0 Litre Twin Turbo Diesel Engine',
'220Turbo Petrol Engine', 'Vti Tech Petrol Engine', nan],
dtype=object)
In [240]:
#Mini repetition
#Petrol Engine == Petroll Engine
In [241]:
main.loc[main.Model=="Mini"][["Engine Type"]]["Engine Type"].unique()
Out[241]:
array(['Petrol Engine', nan, 'Petroll Engine'], dtype=object)
In [242]:
_Mini=main.loc[main.Model=="Mini"][["Varient","Engine Type"]]
In [243]:
varient_type=[]
fuel_types=[]
_Mini["Engine Type"]=_Mini.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [244]:
_Mini["Engine Type"].unique()
Out[244]:
array(['Petrol Engine', nan], dtype=object)
In [245]:
#Mitsubishi repetition
#Petrol Engine == Petroll Engine
#check Intercooled Turbocharged == Intercooled Turbocharger
In [246]:
main.loc[main.Model=="Mitsubishi"][["Engine Type"]]["Engine Type"].unique()
Out[246]:
array(['In-Line Engine', 'Petrol', nan, 'Intercooled Turbocharged',
'Intercooled Turbocharger', '4 Cylinder In-Line Petrol',
'DI-Diesel Engine', '2.4 litre 16 Valve Petrol',
'Common Rail DI-D Engine'], dtype=object)
In [247]:
_Mitsubishi=main.loc[main.Model=="Mitsubishi"][["Varient","Engine Type"]]
In [248]:
varient_type=[]
fuel_types=[]
_Mitsubishi["Engine Type"]=_Mitsubishi.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [249]:
_Mitsubishi["Engine Type"].unique()
Out[249]:
array(['In Line Diesel Engine', 'Petrol Engine', 'In Line Cng Engine',
nan, 'Intercooled Turbocharged Petrol Engine',
'Intercooled Turbocharger Diesel Engine',
'4 Cylinder In Line Petrol Engine', 'Di Diesel Engine',
'2.4 Litre 16 Valve Petrol Engine',
'Common Rail Di D Diesel Engine'], dtype=object)
In [250]:
#Nissan has no repetition
In [251]:
main.loc[main.Model=="Nissan"][["Engine Type"]]["Engine Type"].unique()
Out[251]:
array(['HRA0 1.0 TURBO PETROL', 'B4D 1.0 NA PETROL',
'1.3 L HR13DDT Turbo Petrol', 'V6 Twin Turbo Petrol Engine',
'1.5 L HR15 Petrol'], dtype=object)
In [252]:
_Nissan=main.loc[main.Model=="Nissan"][["Varient","Engine Type"]]
In [253]:
varient_type=[]
fuel_types=[]
_Nissan["Engine Type"]=_Nissan.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [254]:
_Nissan["Engine Type"].unique()
Out[254]:
array(['Hra0 1.0 Turbo Petrol Engine', 'B4D 1.0 Na Petrol Engine',
'1.3 Litre Hr13Ddt Turbo Petrol Engine',
'V6 Twin Turbo Petrol Engine', '1.5 Litre Hr15 Petrol Engine'],
dtype=object)
In [255]:
#Porsche has no repetition
#Twin Turbo V8 Engine == Twin-turbo V8
In [256]:
main.loc[main.Model=="Porsche"][["Engine Type"]]["Engine Type"].unique()
Out[256]:
array(['Petrol Engine', nan, '4.0 Litre V6 Turbo Engine',
'2.0L Mid-engine', 'Twin Turbo V8 Engine', 'V6 Petrol Engine',
'naturally aspirated boxer engine', 'Twin-Turbocharged Engine',
'4.0L Petrol engine', '3.0 Litre V6 Engine', 'Twin-turbo V8',
'V6 Diesel Engine', '4.0 V8 twin-turbo', 'V8 Petrol Engine'],
dtype=object)
In [257]:
_Porsche=main.loc[main.Model=="Porsche"][["Varient","Engine Type"]]
In [258]:
varient_type=[]
fuel_types=[]
_Porsche["Engine Type"]=_Porsche.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [259]:
_Porsche["Engine Type"].unique()
Out[259]:
array(['Petrol Engine', nan, '4.0 Litre V6 Turbo Petrol Engine',
'2.0 Litre Mid Engine', 'Twin Turbo V8 Petrol Engine',
'V6 Petrol Engine', 'Naturally Aspirated Boxer Petrol Engine',
'Twin Turbocharged Petrol Engine', '4.0 Litre Petrol Engine',
'3.0 Litre V6 Petrol Engine', 'V6 Diesel Engine',
'4.0 V8 Twin Turbo Petrol Engine', 'V8 Petrol Engine'],
dtype=object)
In [260]:
#Renault has no repetition
In [261]:
main.loc[main.Model=="Renault"][["Engine Type"]]["Engine Type"].unique()
Out[261]:
array(['1.0L ENERGY', nan, '0.8 SCe', '1.0 SCe', '1.0L TURBO'],
dtype=object)
In [262]:
_Renault=main.loc[main.Model=="Renault"][["Varient","Engine Type"]]
In [263]:
varient_type=[]
fuel_types=[]
_Renault["Engine Type"]=_Renault.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [264]:
_Renault["Engine Type"].unique()
Out[264]:
array(['1.0 Litre Energy Petrol Engine', nan, '0.8 Sce Petrol Engine',
'1.0 Sce Petrol Engine', '1.0 Litre Turbo Petrol Engine'],
dtype=object)
In [265]:
#Rolls-Royce has no repetition
In [266]:
main.loc[main.Model=="Rolls-Royce"][["Engine Type"]]["Engine Type"].unique()
Out[266]:
array(['V12 Petrol Engine', nan, 'V Type Engine', '6.75-litre V12 engine'],
dtype=object)
In [267]:
_Rolls_Royce=main.loc[main.Model=="Rolls-Royce"][["Varient","Engine Type"]]
In [268]:
varient_type=[]
fuel_types=[]
_Rolls_Royce["Engine Type"]=_Rolls_Royce.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [269]:
_Rolls_Royce["Engine Type"].unique()
Out[269]:
array(['V12 Petrol Engine', nan, 'V Type Diesel Engine',
'6.75 Litre V12 Petrol Engine'], dtype=object)
In [270]:
#Skoda has no repetition
#1.5 L TSI Petrol==1.5 TSI Petrol Engine==1.5 TSI Petrol Engine
#2.0 L Turbocharged Petrol Engine==2.0L Turbocharged Petrol
#1.0 L TSI Petrol==1.0 TSI Petrol Engine
In [271]:
main.loc[main.Model=="Skoda"][["Engine Type"]]["Engine Type"].unique()
Out[271]:
array(['1.5 L TSI Petrol', '2.0 L Turbocharged Petrol Engine',
'1.0 L TSI Petrol', '1.0 TSI Petrol Engine', '1.5 TSI Petrol',
'1.5 TSI Petrol Engine', '1.0 TSI Petrol',
'2.0 L TSI Petrol Engine', '2.0L Turbocharged Petrol'],
dtype=object)
In [272]:
_Skoda=main.loc[main.Model=="Skoda"][["Varient","Engine Type"]]
In [273]:
varient_type=[]
fuel_types=[]
_Skoda["Engine Type"]=_Skoda.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [274]:
_Skoda["Engine Type"].unique()
Out[274]:
array(['1.5 Litre Tsi Petrol Engine',
'2.0 Litre Turbocharged Petrol Engine',
'1.0 Litre Tsi Petrol Engine', '2.0 Litre Tsi Petrol Engine'],
dtype=object)
In [275]:
#Tata repetition
#1.2 L Revotron == 1.2 l Revotron Engine == 1.2 l Revotron == 1.2L Revotron Engine
#1.5l Turbocharged Revotorq Engine == 1.5 l Turbocharged Revotorq
In [276]:
main.loc[main.Model=="Tata"][["Engine Type"]]["Engine Type"].unique()
Out[276]:
array(['Kryotec 2.0 L Turbocharged Engine',
'1.2l Turbocharged Revotron Engine', '1.2 L Revotron',
'1.2 l Revotron Engine', 'Revotron 1.2 l BS6 Engine', nan,
'1.2 l Revotron', 'TATA 4SP CR TCIC',
'1.5l Turbocharged Revotorq Engine', '1.5 l Turbocharged Revotorq',
'1.2L Revotron Engine', '1.2 l i-Turbo', '1.2 L i-CNG'],
dtype=object)
In [277]:
_Tata=main.loc[main.Model=="Tata"][["Varient","Engine Type"]]
In [278]:
varient_type=[]
fuel_types=[]
_Tata["Engine Type"]=_Tata.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [279]:
_Tata["Engine Type"].unique()
Out[279]:
array(['Kryotec 2.0 Litre Turbocharged Diesel Engine',
'1.2 Litre Turbocharged Revotron Petrol Engine',
'1.2 Litre Revotron Petrol Engine',
'Revotron 1.2 Litre Bs6 Petrol Engine', nan,
'Tata 4Sp Cr Tcic Diesel Engine',
'1.5 Litre Turbocharged Revotorq Diesel Engine',
'1.2 Litre Revotron Cng Engine', '1.2 Litre I Turbo Petrol Engine',
'1.2 Litre I Cng Engine'], dtype=object)
In [280]:
#Toyota repetition
#2.8 L Diesel engine == 2.8 L Diesel Engine
#2.7L Dual VVT == 2.7 L Dual VVT
In [281]:
main.loc[main.Model=="Toyota"][["Engine Type"]]["Engine Type"].unique()
Out[281]:
array(['2.8 L Diesel engine', '2.8 L Diesel Engine', '2.7L Dual VVT',
'1.2 L Petrol Engine', nan, '2.7L Petrol Engine', '2.7 L Dual VVT',
'2.5L Dynamic Force Engine', 'Gasoline Hybrid and E-FOUR'],
dtype=object)
In [282]:
_Toyota=main.loc[main.Model=="Toyota"][["Varient","Engine Type"]]
In [283]:
varient_type=[]
fuel_types=[]
_Toyota["Engine Type"]=_Toyota.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [284]:
_Toyota["Engine Type"].unique()
Out[284]:
array(['2.8 Litre Diesel Engine', '2.7 Litre Dual Vvt Petrol Engine',
'1.2 Litre Petrol Engine', nan, '2.7 Litre Petrol Engine',
'2.5 Litre Dynamic Force Petrol Engine',
'Gasoline Hybrid And E Four Petrol Engine'], dtype=object)
In [285]:
#Volkswagen has no repetition
In [286]:
main.loc[main.Model=="Volkswagen"][["Engine Type"]]["Engine Type"].unique()
Out[286]:
array(['1.0L TSI', 'TSI Petrol Engine', '1.5L TSI EVO with ACT',
'2.0 TSI'], dtype=object)
In [287]:
_Volkswagen=main.loc[main.Model=="Volkswagen"][["Varient","Engine Type"]]
In [288]:
varient_type=[]
fuel_types=[]
_Volkswagen["Engine Type"]=_Volkswagen.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [289]:
_Volkswagen["Engine Type"].unique()
Out[289]:
array(['1.0 Litre Tsi Petrol Engine', 'Tsi Petrol Engine',
'1.5 Litre Tsi Evo With Act Petrol Engine',
'2.0 Tsi Petrol Engine'], dtype=object)
In [290]:
#Volvo has no repetition
In [291]:
main.loc[main.Model=="Volvo"][["Engine Type"]]["Engine Type"].unique()
Out[291]:
array(['Twin Turbo & Super Charge Petrol Engine', nan,
'Turbo Petrol Engine'], dtype=object)
In [292]:
_Volvo=main.loc[main.Model=="Volvo"][["Varient","Engine Type"]]
In [293]:
varient_type=[]
fuel_types=[]
_Volvo["Engine Type"]=_Volvo.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [294]:
_Volvo["Engine Type"].unique()
Out[294]:
array(['Twin Turbo & Super Charge Petrol Engine', nan,
'Turbo Petrol Engine'], dtype=object)
In [295]:
#Apply the preprocess_Engine_Type function to make transformation
varient_type=[]
fuel_types=[]
main["Engine Type"]=main.apply(lambda x:preprocess_Engine_Type(x["Engine Type"],x["Varient"]),axis=1)
In [296]:
main[["Model","Varient","Engine Type","Fuel Type"]].loc[main["Fuel Type"]=="CNG"]
Out[296]:
| Model | Varient | Engine Type | Fuel Type | |
|---|---|---|---|---|
| 10 | Maruti | Alto 800 LXI Opt S-CNG | F8D Cng Engine | CNG |
| 15 | Maruti | Swift Dzire Tour S CNG (O) | NaN | CNG |
| 21 | Maruti | Ertiga ZXI CNG | K15C Cng Engine | CNG |
| 45 | Maruti | Wagon R VXI CNG | K10C Cng Engine | CNG |
| 48 | Maruti | Super Carry STD CNG | Multi Point Fuel Injection G12B Bs—Vi Cng Engine | CNG |
| ... | ... | ... | ... | ... |
| 154723 | Tata | Tigor XZ CNG | 1.2 Litre Revotron Cng Engine | CNG |
| 154733 | Tata | Tigor XZ Plus CNG | 1.2 Litre Revotron Cng Engine | CNG |
| 154736 | Tata | Tigor XM CNG | 1.2 Litre Revotron Cng Engine | CNG |
| 154751 | Tata | Tiago XZ Plus CNG | 1.2 Litre I Cng Engine | CNG |
| 154772 | Tata | Tigor XZ Plus CNG | 1.2 Litre Revotron Cng Engine | CNG |
3888 rows × 4 columns
In [297]:
main.columns[28]
Out[297]:
'Displacement (cc)'
In [298]:
main["Displacement (cc)"]
Out[298]:
0 1197.0
1 1196.0
2 1462.0
3 1197.0
4 998.0
...
160026 NaN
160027 1969.0
160028 1969.0
160029 1969.0
160030 1969.0
Name: Displacement (cc), Length: 160031, dtype: float64
In [299]:
main.columns[29]
Out[299]:
'Max Power'
In [300]:
main["Max Power"]
Out[300]:
0 88.50bhp@6000rpm
1 72.41bhp@6000rpm
2 103.25bhp@6000rpm
3 88.50bhp@6000rpm
4 65.71bhp@5500rpm
...
160026 402.30bhp
160027 NaN
160028 246.58Bhp
160029 NaN
160030 NaN
Name: Max Power, Length: 160031, dtype: object
In [301]:
#Max power is same as the Max Power (bhp@rpm) so dropit,actually this information is present in both section
main.drop(["Max Power"],axis=1,inplace=True)
In [302]:
main.columns[29]
Out[302]:
'Max Torque'
In [303]:
main["Max Torque"]
Out[303]:
0 113Nm@4400rpm
1 98Nm@3000rpm
2 138Nm@4400rpm
3 113Nm@4400rpm
4 89Nm@3500rpm
...
160026 660Nm
160027 NaN
160028 350Nm
160029 NaN
160030 NaN
Name: Max Torque, Length: 160031, dtype: object
In [304]:
main.drop(["Max Torque"],axis=1,inplace=True)
In [305]:
main.columns[29]
Out[305]:
'Valves Per Cylinder'
In [306]:
main["Valves Per Cylinder"]
Out[306]:
0 4.0
1 4.0
2 4.0
3 4.0
4 4.0
...
160026 NaN
160027 4.0
160028 4.0
160029 4.0
160030 4.0
Name: Valves Per Cylinder, Length: 160031, dtype: float64
In [307]:
main.columns[30]
Out[307]:
'Gear Box'
In [308]:
main["Gear Box"]
Out[308]:
0 5 Speed
1 5 Speed
2 5 Speed
3 5 Speed
4 5 Speed
...
160026 single speed transmission
160027 NaN
160028 8-speed
160029 NaN
160030 NaN
Name: Gear Box, Length: 160031, dtype: object
In [309]:
main["Gear Box"].unique()
Out[309]:
array(['5 Speed', '5-Speed', '4 Speed', '6-Speed', 'AGS', nan, '6 Speed',
'6 Speed Automatic', '10 speed', '10 Speed', '7-speed Stronic',
'7-speed DCT', '7 Speed', '8 Speed',
'7 speed S-tronic transmission', '8-speed tiptronic/automatic',
'8-speed tiptronic', '8 speed', '8-Speed Steptronic', '8-Speed',
'7-Speed', '7-Speed Steptronic',
'8-Speed Steptronic Sport Automatic Transmission', 'Single speed',
'8-Speed Automatic Transmission', '8-speed Steptronic Automatic',
'8-speed M-Steptronic', '8-speed DCT', '8-speed', 'CVT',
'6-speed IVT', '7-Speed DCT', 'iMT', '7 Speed DCT', '6-speed',
'6 Speed IMT', '6 Speed IVT', 'IVT', '6-speed iMT', '9-Speed',
'9 Speed', '6-Speed iMT', '7 Speed dual clutch transmission',
'7 Speed LDF DCT', '8', 'E-CVT', '6-speed AutoSHIFT',
'Mercedes Benz 7 Speed Automatic', 'Fully Automatic',
'AMG Speedshift 9G TCT Automatic', '9-speed automatic',
'9G-TRONIC', 'AMG 8 Speed DCT', 'Single-speed transmission',
'MCT 9-Speed', '9G TRONIC', '9-speed', 'AMG MCT 9G Sport',
'AMG 7-SPEED DCT', 'AMG TCT 9G', 'AMG SPEEDSHIFT DCT 8G',
'SPEEDSHIFT TCT 9G', '9G-TRONIC automatic',
'SPEEDSHIFT TCT 9-speed', '9 speed Tronic',
'7-Speed DCT dual-clutch', '8G-DCT', 'AMG SPEEDSHIFT MCT 9G',
'7 Speed 9G-Tronic automatic', '7G-DCT', '8-Speed DCT',
'6-speed CVT', '8 Speed CVT', '8-speed Steptronic Transmission',
'7-Speed DCT Steptronic', 'Six Speed Manual with Paddle Shifter',
'6', '7-speed PDK', '2-speed transmission', '8-speed Tiptronic S',
'8 SpeedPDK', '8-speed Porsche Doppelkupplung',
'8-speed Tiptronic S with shift-by-wire', '7-Speed DSG', '7-speed',
'7-speed DSG', '6-Speed DCT', '6 Speed iMT',
'6 Speed with Sequential Shift', '8Speed',
'single speed transmission'], dtype=object)
In [310]:
def preprocess_gear_box(text):
list_of_words_to_remove=[" Automatic"," Transmission","/Automatic"]
Activation=False
text=str(text)
if text=="nan":
return np.nan
text=text.title()
if "G" in text:
try:
int(text[text.find("G")-1])
get_index=text.find("G")
text=text.replace("G","")
Activation=True
except:
pass
if "-" in text:
try:
pre=int(text[text.find("-")-1])
next_=str(text[text.find("-")+1])
text=text.replace("-"," ")
except:
text=text.replace("-","")
text=text.title()
for i in list_of_words_to_remove:
if i in text:
text=text.replace("{}".format(i),"")
if len(text)==1:
try:
checking=int(text)
text=text+" Speed"
except:
pass
try:
int(text[0])
if text[1]!=" ":
text=text[:1]+"-"+text[1:]
except:
pass
if "-" in text:
text=text.replace("-"," ")
if Activation==True:
text=text[:get_index]+"G"+text[get_index:]
return text
In [311]:
#Maruti
In [312]:
main.loc[main["Model"]=="Maruti"][["Gear Box"]]["Gear Box"].unique()
Out[312]:
array(['5 Speed', '5-Speed', '4 Speed', '6-Speed', 'AGS', nan],
dtype=object)
In [313]:
com=main.loc[main["Model"]=="Maruti"]["Gear Box"].apply(preprocess_gear_box)
In [314]:
com.unique()
Out[314]:
array(['5 Speed', '4 Speed', '6 Speed', 'Ags', 'Nan'], dtype=object)
In [315]:
#Ford
In [316]:
main.loc[main["Model"]=="Ford"][["Gear Box"]]["Gear Box"].unique()
Out[316]:
array(['5 Speed', '6 Speed', nan, '5-Speed', '6 Speed Automatic',
'10 speed', '10 Speed'], dtype=object)
In [317]:
com=main.loc[main["Model"]=="Ford"]["Gear Box"].apply(preprocess_gear_box)
In [318]:
com.unique()
Out[318]:
array(['5 Speed', '6 Speed', 'Nan', '1 0 Speed'], dtype=object)
In [319]:
#Audi
In [320]:
main.loc[main["Model"]=="Audi"][["Gear Box"]]["Gear Box"].unique()
Out[320]:
array([nan, '7-speed Stronic', '7-speed DCT', '7 Speed', '8 Speed',
'7 speed S-tronic transmission', '8-speed tiptronic/automatic',
'8-speed tiptronic'], dtype=object)
In [321]:
com=main.loc[main["Model"]=="Audi"]["Gear Box"].apply(preprocess_gear_box)
In [322]:
com.unique()
Out[322]:
array(['Nan', '7 Speed Stronic', '7 Speed Dct', '7 Speed', '8 Speed',
'8 Speed Tiptronic'], dtype=object)
In [323]:
#Bentley
In [324]:
main.loc[main["Model"]=="Bentley"][["Gear Box"]]["Gear Box"].unique()
Out[324]:
array(['8 Speed', '8 speed'], dtype=object)
In [325]:
com=main.loc[main["Model"]=="Bentley"]["Gear Box"].apply(preprocess_gear_box)
In [326]:
com.unique()
Out[326]:
array(['8 Speed'], dtype=object)
In [327]:
#Force
In [328]:
main.loc[main["Model"]=="Force"][["Gear Box"]]["Gear Box"].unique()
Out[328]:
array(['5 Speed'], dtype=object)
In [329]:
com=main.loc[main["Model"]=="Force"]["Gear Box"].apply(preprocess_gear_box)
In [330]:
com.unique()
Out[330]:
array(['5 Speed'], dtype=object)
In [331]:
#Ferrari
In [332]:
main.loc[main["Model"]=="Ferrari"][["Gear Box"]]["Gear Box"].unique()
Out[332]:
array(['7 Speed', '8-speed DCT', '8 speed', nan, '8-speed'], dtype=object)
In [333]:
com=main.loc[main["Model"]=="Ferrari"]["Gear Box"].apply(preprocess_gear_box)
In [334]:
com.unique()
Out[334]:
array(['7 Speed', '8 Speed Dct', '8 Speed', 'Nan'], dtype=object)
In [335]:
#Honda
In [336]:
main.loc[main["Model"]=="Honda"][["Gear Box"]]["Gear Box"].unique()
Out[336]:
array(['7 Speed', '6 Speed', 'CVT', '5 Speed', nan, '5-Speed'],
dtype=object)
In [337]:
com=main.loc[main["Model"]=="Honda"]["Gear Box"].apply(preprocess_gear_box)
In [338]:
com.unique()
Out[338]:
array(['7 Speed', '6 Speed', 'Cvt', '5 Speed', 'Nan'], dtype=object)
In [339]:
#Hyundai
In [340]:
main.loc[main["Model"]=="Hyundai"][["Gear Box"]]["Gear Box"].unique()
Out[340]:
array(['6-speed IVT', '5-Speed', '5 Speed', '7-Speed DCT', '6 Speed',
'iMT', '7 Speed DCT', '6-speed', '6-Speed', '6 Speed IMT',
'6 Speed IVT', 'IVT', '7-speed DCT', '8-Speed', '7 Speed',
'6-speed iMT'], dtype=object)
In [341]:
com=main.loc[main["Model"]=="Hyundai"]["Gear Box"].apply(preprocess_gear_box)
In [342]:
com.unique()
Out[342]:
array(['6 Speed Ivt', '5 Speed', '7 Speed Dct', '6 Speed', 'Imt',
'6 Speed Imt', 'Ivt', '8 Speed', '7 Speed'], dtype=object)
In [343]:
#Isuzu
In [344]:
main.loc[main["Model"]=="Isuzu"][["Gear Box"]]["Gear Box"].unique()
Out[344]:
array(['6-Speed', '6 Speed'], dtype=object)
In [345]:
com=main.loc[main["Model"]=="Isuzu"]["Gear Box"].apply(preprocess_gear_box)
In [346]:
com.unique()
Out[346]:
array(['6 Speed'], dtype=object)
In [347]:
#Jeep
In [348]:
main.loc[main["Model"]=="Jeep"][["Gear Box"]]["Gear Box"].unique()
Out[348]:
array(['7 Speed DCT', '9-Speed', '7 Speed', '6-Speed', '8 Speed',
'6 Speed', nan, '9 Speed', '7-Speed DCT'], dtype=object)
In [349]:
com=main.loc[main["Model"]=="Jeep"]["Gear Box"].apply(preprocess_gear_box)
In [350]:
com.unique()
Out[350]:
array(['7 Speed Dct', '9 Speed', '7 Speed', '6 Speed', '8 Speed', 'Nan'],
dtype=object)
In [351]:
#Kia
In [352]:
main.loc[main["Model"]=="Kia"][["Gear Box"]]["Gear Box"].unique()
Out[352]:
array(['6-Speed', '7-Speed DCT', '6-Speed iMT', '5-Speed', nan, '8-Speed',
'IVT'], dtype=object)
In [353]:
com=main.loc[main["Model"]=="Kia"]["Gear Box"].apply(preprocess_gear_box)
In [354]:
com.unique()
Out[354]:
array(['6 Speed', '7 Speed Dct', '6 Speed Imt', '5 Speed', 'Nan',
'8 Speed', 'Ivt'], dtype=object)
In [355]:
#Lamborghini
In [356]:
main.loc[main["Model"]=="Lamborghini"][["Gear Box"]]["Gear Box"].unique()
Out[356]:
array(['7 Speed dual clutch transmission', '7 Speed LDF DCT', '7 Speed',
'8'], dtype=object)
In [357]:
com=main.loc[main["Model"]=="Lamborghini"]["Gear Box"].apply(preprocess_gear_box)
In [358]:
com.unique()
Out[358]:
array(['7 Speed Dual Clutch', '7 Speed Ldf Dct', '7 Speed', '8 Speed'],
dtype=object)
In [359]:
#Land_Rover
In [360]:
main.loc[main["Model"]=="Land_Rover"][["Gear Box"]]["Gear Box"].unique()
Out[360]:
array(['8-Speed Automatic Transmission', nan, '8 Speed', '9 Speed'],
dtype=object)
In [361]:
com=main.loc[main["Model"]=="Land_Rover"]["Gear Box"].apply(preprocess_gear_box)
In [362]:
com.unique()
Out[362]:
array(['8 Speed', 'Nan', '9 Speed'], dtype=object)
In [363]:
#Lexus
In [364]:
main.loc[main["Model"]=="Lexus"][["Gear Box"]]["Gear Box"].unique()
Out[364]:
array([nan, '10 Speed', '10 speed', '8 Speed', 'E-CVT'], dtype=object)
In [365]:
com=main.loc[main["Model"]=="Lexus"]["Gear Box"].apply(preprocess_gear_box)
In [366]:
com.unique()
Out[366]:
array(['Nan', '1 0 Speed', '8 Speed', 'Ecvt'], dtype=object)
In [367]:
#Mahindra
In [368]:
main.loc[main["Model"]=="Mahindra"][["Gear Box"]]["Gear Box"].unique()
Out[368]:
array(['6-speed AutoSHIFT', '6-Speed', '6 Speed',
'Mercedes Benz 7 Speed Automatic', nan, '5 Speed', '5-Speed',
'6-speed', 'Fully Automatic'], dtype=object)
In [369]:
com=main.loc[main["Model"]=="Mahindra"]["Gear Box"].apply(preprocess_gear_box)
In [370]:
com.unique()
Out[370]:
array(['6 Speed Autoshift', '6 Speed', 'Mercedes Benz 7 Speed', 'Nan',
'5 Speed', 'Fully'], dtype=object)
In [371]:
#Aston_Martin
In [372]:
main.loc[main["Model"]=="Aston_Martin"][["Gear Box"]]["Gear Box"].unique()
Out[372]:
array(['AMG Speedshift 9G TCT Automatic', '9-speed automatic'],
dtype=object)
In [373]:
com=main.loc[main["Model"]=="Aston_Martin"]["Gear Box"].apply(preprocess_gear_box)
In [374]:
com.unique()
Out[374]:
array(['Amg Speedshift 9G Tct', '9 Speed'], dtype=object)
In [375]:
#Maserati
In [376]:
main.loc[main["Model"]=="Maserati"][["Gear Box"]]["Gear Box"].unique()
Out[376]:
array(['8 Speed', nan, '6 Speed'], dtype=object)
In [377]:
com=main.loc[main["Model"]=="Maserati"]["Gear Box"].apply(preprocess_gear_box)
In [378]:
com.unique()
Out[378]:
array(['8 Speed', 'Nan', '6 Speed'], dtype=object)
In [379]:
#Mercedes-Benz
In [380]:
main.loc[main["Model"]=="Mercedes-Benz"][["Gear Box"]]["Gear Box"].unique()
Out[380]:
array(['9G-TRONIC', '9-Speed', 'AMG 8 Speed DCT',
'Single-speed transmission', 'MCT 9-Speed', '9G TRONIC', '9-speed',
'AMG MCT 9G Sport', nan, '9 Speed', 'AMG 7-SPEED DCT',
'AMG TCT 9G', '7-Speed DCT', 'AMG SPEEDSHIFT DCT 8G',
'AMG Speedshift 9G TCT Automatic', 'SPEEDSHIFT TCT 9G',
'9G-TRONIC automatic', '7 Speed', 'SPEEDSHIFT TCT 9-speed',
'9 speed Tronic', '7-Speed DCT dual-clutch', '8G-DCT',
'AMG SPEEDSHIFT MCT 9G', '7 Speed 9G-Tronic automatic', '7G-DCT',
'8-Speed DCT'], dtype=object)
In [381]:
com=main.loc[main["Model"]=="Mercedes-Benz"]["Gear Box"].apply(preprocess_gear_box)
In [382]:
com.unique()
Out[382]:
array(['9G Tronic', '9 Speed', 'Amg 8 Speed Dct', 'Singlespeed',
'Mct 9 Speed', 'Amg Mct 9G Sport', 'Nan', 'Amg 7 Speed Dct',
'Amg Tct 9G', '7 Speed Dct', 'Amg Speedshift Dct 8G',
'Amg Speedshift 9G Tct', 'Speedshift Tct 9G', '7 Speed',
'Speedshift Tct 9 Speed', '9 Speed Tronic',
'7 Speed Dct Dual Clutch', '8G Dct', 'Amg Speedshift Mct 9G',
'7 Speed 9G Tronic', '7G Dct', '8 Speed Dct'], dtype=object)
In [383]:
#MG
In [384]:
main.loc[main["Model"]=="MG"][["Gear Box"]]["Gear Box"].unique()
Out[384]:
array(['6-Speed', '6-speed', '6-speed CVT', '8-Speed', '8 Speed',
'6 Speed', '5 Speed', '8 Speed CVT', nan, '8-speed'], dtype=object)
In [385]:
com=main.loc[main["Model"]=="MG"]["Gear Box"].apply(preprocess_gear_box)
In [386]:
com.unique()
Out[386]:
array(['6 Speed', '6 Speed Cvt', '8 Speed', '5 Speed', '8 Speed Cvt',
'Nan'], dtype=object)
In [387]:
#Mini
In [388]:
main.loc[main["Model"]=="Mini"][["Gear Box"]]["Gear Box"].unique()
Out[388]:
array(['7 Speed', '8-speed Steptronic Transmission', nan,
'7-Speed DCT Steptronic'], dtype=object)
In [389]:
com=main.loc[main["Model"]=="Mini"]["Gear Box"].apply(preprocess_gear_box)
In [390]:
com.unique()
Out[390]:
array(['7 Speed', '8 Speed Steptronic', 'Nan', '7 Speed Dct Steptronic'],
dtype=object)
In [391]:
#Mitsubishi
In [392]:
main.loc[main["Model"]=="Mitsubishi"][["Gear Box"]]["Gear Box"].unique()
Out[392]:
array(['5 Speed', 'Six Speed Manual with Paddle Shifter', nan, '6 Speed',
'6'], dtype=object)
In [393]:
com=main.loc[main["Model"]=="Mitsubishi"]["Gear Box"].apply(preprocess_gear_box)
In [394]:
com.unique()
Out[394]:
array(['5 Speed', 'Six Speed Manual With Paddle Shifter', 'Nan',
'6 Speed'], dtype=object)
In [395]:
#Nissan
In [396]:
main.loc[main["Model"]=="Nissan"][["Gear Box"]]["Gear Box"].unique()
Out[396]:
array(['5 Speed', 'CVT', '6-Speed', '6 Speed', '5-Speed'], dtype=object)
In [397]:
com=main.loc[main["Model"]=="Nissan"]["Gear Box"].apply(preprocess_gear_box)
In [398]:
com.unique()
Out[398]:
array(['5 Speed', 'Cvt', '6 Speed'], dtype=object)
In [399]:
#Porsche
In [400]:
main.loc[main["Model"]=="Porsche"][["Gear Box"]]["Gear Box"].unique()
Out[400]:
array([nan, '8-Speed', '7-speed PDK', '2-speed transmission',
'8-speed Tiptronic S', '6 Speed', '8 SpeedPDK', '8 Speed',
'7 Speed', '8-speed Porsche Doppelkupplung',
'8-speed Tiptronic S with shift-by-wire'], dtype=object)
In [401]:
com=main.loc[main["Model"]=="Porsche"]["Gear Box"].apply(preprocess_gear_box)
In [402]:
com.unique()
Out[402]:
array(['Nan', '8 Speed', '7 Speed Pdk', '2 Speed', '8 Speed Tiptronic S',
'6 Speed', '8 Speedpdk', '7 Speed',
'8 Speed Porsche Doppelkupplung',
'8 Speed Tiptronic S With Shift By Wire'], dtype=object)
In [403]:
#Renault
In [404]:
main.loc[main["Model"]=="Renault"][["Gear Box"]]["Gear Box"].unique()
Out[404]:
array(['5-Speed', '5 Speed', 'CVT'], dtype=object)
In [405]:
com=main.loc[main["Model"]=="Renault"]["Gear Box"].apply(preprocess_gear_box)
In [406]:
com.unique()
Out[406]:
array(['5 Speed', 'Cvt'], dtype=object)
In [407]:
#Rolls-Royce
In [408]:
main.loc[main["Model"]=="Rolls-Royce"][["Gear Box"]]["Gear Box"].unique()
Out[408]:
array(['8 Speed', nan], dtype=object)
In [409]:
com=main.loc[main["Model"]=="Rolls-Royce"]["Gear Box"].apply(preprocess_gear_box)
In [410]:
com.unique()
Out[410]:
array(['8 Speed', 'Nan'], dtype=object)
In [411]:
#Skoda
In [412]:
main.loc[main["Model"]=="Skoda"][["Gear Box"]]["Gear Box"].unique()
Out[412]:
array(['6-Speed', '7-Speed DSG', '6-speed', '7-Speed', '7-speed',
'7-speed DSG'], dtype=object)
In [413]:
com=main.loc[main["Model"]=="Skoda"]["Gear Box"].apply(preprocess_gear_box)
In [414]:
com.unique()
Out[414]:
array(['6 Speed', '7 Speed Dsg', '7 Speed'], dtype=object)
In [415]:
#Tata
In [416]:
main.loc[main["Model"]=="Tata"][["Gear Box"]]["Gear Box"].unique()
Out[416]:
array(['6-Speed', '5-Speed', '5 Speed', '6-Speed DCT', 'Single speed',
nan], dtype=object)
In [417]:
com=main.loc[main["Model"]=="Tata"]["Gear Box"].apply(preprocess_gear_box)
In [418]:
com.unique()
Out[418]:
array(['6 Speed', '5 Speed', '6 Speed Dct', 'Single Speed', 'Nan'],
dtype=object)
In [419]:
#Toyota
In [420]:
main.loc[main["Model"]=="Toyota"][["Gear Box"]]["Gear Box"].unique()
Out[420]:
array(['6 Speed iMT', '6-Speed iMT', '6 Speed with Sequential Shift',
'6 Speed', '5-Speed', nan, '6-Speed', '5 Speed'], dtype=object)
In [421]:
com=main.loc[main["Model"]=="Toyota"]["Gear Box"].apply(preprocess_gear_box)
In [422]:
com.unique()
Out[422]:
array(['6 Speed Imt', '6 Speed With Sequential Shift', '6 Speed',
'5 Speed', 'Nan'], dtype=object)
In [423]:
#Volkswagen
In [424]:
main.loc[main["Model"]=="Volkswagen"][["Gear Box"]]["Gear Box"].unique()
Out[424]:
array(['6-Speed', '6 Speed', '7-Speed DSG', '7 Speed DCT'], dtype=object)
In [425]:
com=main.loc[main["Model"]=="Volkswagen"]["Gear Box"].apply(preprocess_gear_box)
In [426]:
com.unique()
Out[426]:
array(['6 Speed', '7 Speed Dsg', '7 Speed Dct'], dtype=object)
In [427]:
#Volvo
In [428]:
main.loc[main["Model"]=="Volvo"][["Gear Box"]]["Gear Box"].unique()
Out[428]:
array(['8 Speed', '8-speed', nan, '8Speed', 'single speed transmission'],
dtype=object)
In [429]:
com=main.loc[main["Model"]=="Volvo"]["Gear Box"].apply(preprocess_gear_box)
In [430]:
com.unique()
Out[430]:
array(['8 Speed', 'Nan', 'Single Speed'], dtype=object)
In [431]:
#Apply this function to all brands
main["Gear Box"]=main["Gear Box"].apply(preprocess_gear_box)
In [432]:
main["Gear Box"].unique()
Out[432]:
array(['5 Speed', '4 Speed', '6 Speed', 'Ags', 'Nan', '1 0 Speed',
'7 Speed Stronic', '7 Speed Dct', '7 Speed', '8 Speed',
'8 Speed Tiptronic', '8 Speed Steptronic', '7 Speed Steptronic',
'8 Speed Steptronic Sport', 'Single Speed', '8 Speed M Steptronic',
'8 Speed Dct', 'Cvt', '6 Speed Ivt', 'Imt', '6 Speed Imt', 'Ivt',
'9 Speed', '7 Speed Dual Clutch', '7 Speed Ldf Dct', 'Ecvt',
'6 Speed Autoshift', 'Mercedes Benz 7 Speed', 'Fully',
'Amg Speedshift 9G Tct', '9G Tronic', 'Amg 8 Speed Dct',
'Singlespeed', 'Mct 9 Speed', 'Amg Mct 9G Sport',
'Amg 7 Speed Dct', 'Amg Tct 9G', 'Amg Speedshift Dct 8G',
'Speedshift Tct 9G', 'Speedshift Tct 9 Speed', '9 Speed Tronic',
'7 Speed Dct Dual Clutch', '8G Dct', 'Amg Speedshift Mct 9G',
'7 Speed 9G Tronic', '7G Dct', '6 Speed Cvt', '8 Speed Cvt',
'7 Speed Dct Steptronic', 'Six Speed Manual With Paddle Shifter',
'7 Speed Pdk', '2 Speed', '8 Speed Tiptronic S', '8 Speedpdk',
'8 Speed Porsche Doppelkupplung',
'8 Speed Tiptronic S With Shift By Wire', '7 Speed Dsg',
'6 Speed Dct', '6 Speed With Sequential Shift'], dtype=object)
In [433]:
main.columns[31]
Out[433]:
'Report Incorrect Specs'
In [434]:
main.drop(["Report Incorrect Specs"],axis=1,inplace=True)
In [435]:
main.columns[32]
Out[435]:
'Petrol Fuel Tank Capacity (Litres)'
In [436]:
main["Petrol Fuel Tank Capacity (Litres)"]
Out[436]:
0 37.0
1 40.0
2 48.0
3 37.0
4 27.0
...
160026 NaN
160027 NaN
160028 60.0
160029 NaN
160030 NaN
Name: Petrol Fuel Tank Capacity (Litres), Length: 160031, dtype: float64
In [437]:
main.columns[33]
Out[437]:
'Emission Norm Compliance'
In [438]:
main["Emission Norm Compliance"]
Out[438]:
0 BS VI
1 BS VI
2 BS VI
3 BS VI
4 BS VI
...
160026 ZEV
160027 NaN
160028 BS VI
160029 NaN
160030 NaN
Name: Emission Norm Compliance, Length: 160031, dtype: object
In [439]:
main["Emission Norm Compliance"].unique()
Out[439]:
array(['BS VI', nan, 'BS IV', 'Bharat Stage III', 'BSIV',
'Bharat Stage IV', 'ZEV', 'Euro VI', 'BS III', 'Euro IV', 'SOHC'],
dtype=object)
In [440]:
def preprocess_emission_norm_compliance(text):
text=str(text)
if text=="nan":
return np.nan
if "Bharat Stage" in text:
text=text.replace("Bharat Stage","BS")
if "IV" in text:
if text[text.find("IV")-1]!=" ":
text=text.replace("IV"," IV")
return text
In [441]:
main["Emission Norm Compliance"]=main["Emission Norm Compliance"].apply(preprocess_emission_norm_compliance)
In [442]:
main.columns[34]
Out[442]:
'Front Suspension'
In [443]:
main["Front Suspension"]
Out[443]:
0 Mac Pherson Strut
1 Macpherson Strut
2 McPherson Strut with coil spring
3 Mac Pherson Strut
4 MacPherson Strut with Coil Spring
...
160026 sophisticated Suspension
160027 NaN
160028 NaN
160029 NaN
160030 NaN
Name: Front Suspension, Length: 160031, dtype: object
In [444]:
main["Front Suspension"].unique()
Out[444]:
array(['Mac Pherson Strut', 'Macpherson Strut',
'McPherson Strut with coil spring',
'MacPherson Strut with Coil Spring',
'Mac Pherson Strut with Coil Spring', 'McPherson',
'McPherson Strut', 'Mac Pherson Strut & Coil Spring',
'MacPherson Strut', 'MacPherson Strut with Coil Spring',
'Mac Pherson Strut & Coil', nan,
'Independent MacPherson Strut with coil spring and anti-roll bar',
'Independent McPherson Strut with Dual Path Mounts',
'Independent MacPherson struts', 'Independent MacPherson Strut',
'Independent Mcpherson',
'Independent Coil Spring With Anti-Roll Bar',
'Independent McPherson Strut with Coil Spring',
'Independent McPherson Strut with Coil spring & Anti-roll bar',
'Independent McPherson strut with coil spring',
'Double Ball Joint MacPherson Strut with Stabilizer Bar',
'Independent McPherson struts with offset coil spring',
'Independent McPherson',
'Independent McPherson struts with offset coil spring & stabiliser bar',
'Independent Double Wishbone With Torsion Bar Spring & Stabilizer Bar',
'Independent Coil Spring with Anti-roll Bar',
'Independent McPherson Strut With Coil Spring & Anti-Roll Bar',
'Double Wishbone',
'Independent double wishbone with torsion bar spring & stabilizer bar',
'Independent McPherson Strut with Coil Spring & Anti-Roll Bar',
'Air Suspension', 'underbody guard with heavy-duty',
'Five-link front suspension; tubular anti-roll bar', 'RS Sports',
'Five-link front axle; tubular anti-roll bar; air spring suspension',
'Sport Adaptive Air Suspension', 'Adaptive Air Suspension',
'RS Sports Suspension plus with DRC', 'S Sports suspension',
'Four link Double Wishbones', 'Adaptive 2-axle Air Suspension',
'Adaptive M-specific Suspension', 'Adaptive M suspension',
'Double Joint Spring Strut',
'Single-joint spring strut axle in lightweight aluminium-steel construction',
'M Sport Suspension', 'independent damping', 'AIRMATIC suspension',
'Adaptive air suspension',
'Adaptive Suspension with variable shock absorber',
'Adaptive Suspension', 'Adaptive M-Specific Suspension',
'Adaptive M Suspension', 'Air-Suspension',
'Independent Double wishbones',
'Independent double wishbone with coil spring',
'independent,double wishbones(Adaptive Dampers)',
'Magnetorheological damper', 'adaptive magnetic suspension',
'MacPherson Strut,Coil Spring', 'McPherson Strut with Coil Spring',
'MacPherson Strut, Coil Spring', 'McPherson Strut, Coil Spring',
'Mcpherson Strut Coil Spring', 'McPherson strut with coil spring',
'McPherson strut', 'Mcpherson Strut',
'Independent Double wishbone coil springs gas shock absorbers stabiliser bar',
'Independent Double Wishbone,Coil Spring',
'Double Wishbone, Coil Spring',
'Independent Double Wishbone, Coil Spring',
'McPherson Strut with Lower Control Arm',
'Mcpherson Strut with Frequency Selective Damping, HRS with Anti Roll Bar',
'Independent double wishbone',
'Mcpherson Strut with Lower Control Arm',
'Mcpherson strut with coil spring',
'Macpherson Strut with coil spring', 'McPherson suspension',
'The Magneto Rheological Suspension',
'Aluminum double-wishbone suspension',
'Push rod magneto-rheologic active with horizontal dampers',
'adaptive air suspension', 'Electronic Air Suspension',
'Double wishbones Coil Suspension', 'Multi-link type,coil springs',
'AIR Adaptive Variable Suspension', 'MacPherson Struts',
'MacPherson struts', 'Double Wishbone with Stabilizer',
'MacPherson Strut with anti-roll bar',
'Double Wishbone Suspension with Coil over Shocks with FDD & MTV-CL',
'Double Wishbone with Coil Spring', 'IFS Coil Spring',
'McPherson Strut Independent Suspension with FSD and Stabilizer bar',
'Independent McPherson Strut with Dual Path Mounts, Coil Spring',
'Independent Double Wishbone Front Suspension with Coil Over Damper & Stabiliser Bar',
'MacPherson Strut with anti-roll bar`',
'Rigid axle with leaf spring',
'MacPherson Type with Wishbone Link',
'Double Wish-bone Type, Independent Front Coil Spring',
'independent double wishbone design coil springs, anti-roll bar and adaptive damping',
'Independent double wishbone, coil springs, anti-roll bar and adaptive dampers',
'Air Adaptive Suspensions',
'Quattroporte Sport GT S is fitted with the single-setting racing-style suspension system',
'AIRMATIC', 'AMG Suspension', 'AMG RIDE CONTROL sports suspension',
'Adaptive Damping System', 'AMG RIDE CONTROL+',
'four-link axle suspension',
'SUSPENSION WITH ADAPTIVE DAMPING SYSTEM', 'ADAPTIVE DAMPING',
'AMG RIDE CONTROL suspension',
'active roll stabilization intelligent suspension',
'Agility control', 'AMG RIDE CONTROL',
'DYNAMIC BODY CONTROL suspension', 'Independent Suspension',
'Rigid Leaf Spring', 'adaptive damping Suspension',
'Macpherson Strut with Stabilizer bar',
'Mcpherson Strut with Stablizer bar',
'Dual Helix Independent Suspension',
'single joint spring-strut front axle', 'Sport Suspension',
'MacPherson Struct', 'Single-Link Spring-Strut',
'McPherson Strut with coil spring & stabilizer bar',
'MacPherson Coil Springs with Stablizer Bar',
'Double wishbone torsion bar with stabiliser bar',
'Independent, Double wishbone coil springs with stabilizer bar',
'McPherson Strut with coil spring & Stabilizer bar',
'McPherson Strut With Bilstein Shock Absorbers,Eibach Springs & Stabilizer Bar',
'Independent Double Wishbone Coil Spring With Stabilizer Bar',
'McPherson Strut & Coil Spring',
'MacPherson strut with coil springs and stabilizer bar',
'Double Wishbone With Coil Spring Suspension',
'Mac Pherson strut with Lower Transverse link',
'Aluminum double-wishbone, independent wheel suspension',
'McPherson spring-strut', 'Double wishbone', 'Active Suspension',
'spring-strut suspension', 'Aluminium double-wishbone front axle',
'Lightweight spring-strut suspension',
'Adaptive air suspension including Porsche Active Suspension',
'Macpherson strut with lower triangle & coil spring',
'Mac Pherson strut with lower Transverse link',
'Double wishbone front axle',
'McPherson suspension with lower triangular links and stabiliser bar',
'McPherson suspension with lower triangular links and torsion stabiliser',
'MacPherson suspension',
'Independent Lower Wishbone McPherson Strut with Coil Spring & Anti Roll Bar',
'Independent Lower Wishbone McPherson Strut with Coil Spring',
'Independent Lower Wishbone McPherson Dual Path Strut',
'Independent, Lower Wishbone, Mcpherson Strut With Coil Spring',
'Independent MacPherson dual path strut with coil spring',
'Independent, Lower wishbone, McPherson Strut with coil spring',
'Independent MacPherson strut with coil spring',
'Independent MacPherson Dual Path Strut with Coil Spring',
'Semi-elliptical leaf springs-6leaves',
'Independent, Lower Wishbone, McPherson Strut with Coil Spring',
'Double Wishbone With Torsion Bar',
'McPherson suspension and stabiliser bar',
'McPherson strut with stabilizer bar',
'Mc-Pherson suspension and stabiliser bar',
'Independent suspension with coil spring', 'Air',
'sophisticated Suspension'], dtype=object)
In [20]:
def preprocess_Front_Suspension(text):
text=str(text)
if text=="nan":
return np.nan
#Two exceptional case occcured in Volkswagen
if "Mc-Pherson suspension and stabiliser bar" in text or "McPherson suspension and stabiliser bar" in text:
return "Mac Pherson Strut With Stabilizer Bar Front Suspension"
text=text.title()
if text.find("c")!=-1 and (text.find("P")!=-1 or text.find("p")!=-1):
try:
checking=text.find("a")
if checking==-1:
raise Exception("a Not Present")
if checking>21:
raise Exception("a is Present in outofbound")
if "Pherson" in text or "pherson" in text:
text=text[:text.find("c")+1]+" "+text[text.find("c")+1:]
except:
if "Pherson" in text or "pherson" in text:
text=text[:text.find("c")]+"a"+text[text.find("c"):]
text=text[:text.find("c")+1]+" "+text[text.find("c")+1:]
else:
if "Independent" in text and "Coil" in text and "Bar" in text and "Double" not in text:
text=text[:12]+" Mac Pherson "+text[12:]
text=text.title()
if "Pherson" in text:
if "Strut" not in text and "Suspension" in text:
text=text.replace("Suspension","")
if "," in text:
text=text.replace(","," ")
if " " in text:
text=text.replace(" "," ")
if text[0]==" ":
text=text[1:]
text=text.title()
if ("Spring" not in text and "Springs" not in text) and ("Pherson" in text):
if "Coil" in text:
text=text+" Spring"
try:
text=text.replace("With","&")
if "with" in text:
raise Exception("Own Exception")
except:
text=text.replace("with","&")
else:
if "Struct" in text:
text=text.replace("Struct","Strut")
if "Strut" not in text and "Link" not in text and "Stabiliser" not in text:
text=text+" Strut"
if "Struts" in text:
text=text.replace("Struts","Strut")
else:
if "Springs" in text:
text=text.replace("Springs","Spring")
if "Struts" in text:
text=text.replace("Struts","Strut")
if "Strut" not in text and "Double" not in text and "Link" not in text and "Ifs" not in text and "Suspension" not in text:
coil=text.find("Coil")
if coil!=-1:
text=text[:text.find("Coil")]+"Strut "+text[text.find("Coil"):]
if "And" in text:
text=text.replace("And","&")
try:
text=text.replace("With","&")
if "with" in text:
raise Exception("Own Exception")
except:
text=text.replace("with","&")
if " " in text:
text=text.replace(" "," ")
if "Coil" in text and "Strut" in text:
if text[text.find("Coil")-2]!="&":
text=text[:text.find("Coil")]+"& "+text[text.find("Coil"):]
text=text.title()
if "Front" not in text:
if "Suspension" not in text:
if "System" not in text:
text=text+" Front Suspension"
if "Adaptive" in text:
text=text+" System"
else:
find_=text.find("System")
text=text[:find_]+"Front Suspension "+text[find_:]
else:
find_=text.find("Suspension")
text=text[:find_]+"Front "+text[find_:]
if "System" not in text:
if "Adaptive" in text and "Damping" in text:
text=text+" System"
else:
text=text+" Suspension"
find_=text.find("Suspension")
find1_=text.rfind("Suspension")
if find_!=find1_:
text=text[:find1_-1]
if ";" in text:
text=text.replace(";"," With")
if "`" in text:
text=text.replace("`","")
if "Stablizer" in text:
text=text.replace("Stablizer","Stabilizer")
return text
In [446]:
#Maruti
In [447]:
main.loc[main["Model"]=="Maruti"][["Front Suspension"]]["Front Suspension"].unique()
Out[447]:
array(['Mac Pherson Strut', 'Macpherson Strut',
'McPherson Strut with coil spring',
'MacPherson Strut with Coil Spring',
'Mac Pherson Strut with Coil Spring', 'McPherson',
'McPherson Strut', 'Mac Pherson Strut & Coil Spring',
'MacPherson Strut', 'MacPherson Strut with Coil Spring',
'Mac Pherson Strut & Coil', nan], dtype=object)
In [448]:
com=main.loc[main["Model"]=="Maruti"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [449]:
com.unique()
Out[449]:
array(['Mac Pherson Strut Front Suspension',
'Mac Pherson Strut & Coil Spring Front Suspension', nan],
dtype=object)
In [450]:
#Ford
In [451]:
main.loc[main["Model"]=="Ford"][["Front Suspension"]]["Front Suspension"].unique()
Out[451]:
array(['Independent MacPherson Strut with coil spring and anti-roll bar',
'Independent McPherson Strut with Dual Path Mounts',
'McPherson Strut', 'Independent MacPherson struts',
'Independent MacPherson Strut', 'Independent Mcpherson',
'Independent Coil Spring With Anti-Roll Bar',
'Independent McPherson Strut with Coil Spring',
'Independent McPherson Strut with Coil spring & Anti-roll bar',
'Independent McPherson strut with coil spring', nan,
'Double Ball Joint MacPherson Strut with Stabilizer Bar',
'Independent McPherson struts with offset coil spring',
'Independent McPherson',
'Independent McPherson struts with offset coil spring & stabiliser bar',
'Independent Double Wishbone With Torsion Bar Spring & Stabilizer Bar',
'MacPherson Strut', 'Independent Coil Spring with Anti-roll Bar',
'Independent McPherson Strut With Coil Spring & Anti-Roll Bar',
'Double Wishbone',
'Independent double wishbone with torsion bar spring & stabilizer bar',
'Independent McPherson Strut with Coil Spring & Anti-Roll Bar'],
dtype=object)
In [452]:
com=main.loc[main["Model"]=="Ford"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [453]:
com.unique()
Out[453]:
array(['Independent Mac Pherson Strut & Coil Spring & Anti-Roll Bar Front Suspension',
'Independent Mac Pherson Strut With Dual Path Mounts Front Suspension',
'Mac Pherson Strut Front Suspension',
'Independent Mac Pherson Strut Front Suspension',
'Independent Mac Pherson Strut & Coil Spring Front Suspension',
nan,
'Double Ball Joint Mac Pherson Strut With Stabilizer Bar Front Suspension',
'Independent Mac Pherson Strut & Offset & Coil Spring Front Suspension',
'Independent Mac Pherson Strut & Offset & Coil Spring & Stabiliser Bar Front Suspension',
'Independent Double Wishbone & Torsion Bar Spring & Stabilizer Bar Front Suspension',
'Double Wishbone Front Suspension'], dtype=object)
In [454]:
#Audi
In [455]:
main.loc[main["Model"]=="Audi"][["Front Suspension"]]["Front Suspension"].unique()
Out[455]:
array(['Air Suspension', 'underbody guard with heavy-duty', nan,
'Five-link front suspension; tubular anti-roll bar', 'RS Sports',
'Five-link front axle; tubular anti-roll bar; air spring suspension',
'Sport Adaptive Air Suspension', 'Adaptive Air Suspension',
'RS Sports Suspension plus with DRC', 'S Sports suspension'],
dtype=object)
In [456]:
com=main.loc[main["Model"]=="Audi"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [457]:
com.unique()
Out[457]:
array(['Air Front Suspension',
'Underbody Guard & Heavy-Duty Front Suspension', nan,
'Five-Link Front Suspension With Tubular Anti-Roll Bar',
'Rs Sports Front Suspension',
'Five-Link Front Axle With Tubular Anti-Roll Bar With Air Spring Suspension',
'Sport Adaptive Air Front Suspension',
'Adaptive Air Front Suspension',
'Rs Sports Front Suspension Plus & Drc',
'S Sports Front Suspension'], dtype=object)
In [458]:
#Bentley
In [459]:
main.loc[main["Model"]=="Bentley"][["Front Suspension"]]["Front Suspension"].unique()
Out[459]:
array(['Air Suspension', 'Four link Double Wishbones'], dtype=object)
In [460]:
com=main.loc[main["Model"]=="Bentley"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [461]:
com.unique()
Out[461]:
array(['Air Front Suspension',
'Four Link Double Wishbones Front Suspension'], dtype=object)
In [462]:
#Force
In [463]:
main.loc[main["Model"]=="Force"][["Front Suspension"]]["Front Suspension"].unique()
Out[463]:
array(['Independent double wishbone with coil spring'], dtype=object)
In [464]:
com=main.loc[main["Model"]=="Force"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [465]:
com.unique()
Out[465]:
array(['Independent Double Wishbone & Coil Spring Front Suspension'],
dtype=object)
In [466]:
#Ferrari
In [467]:
main.loc[main["Model"]=="Ferrari"][["Front Suspension"]]["Front Suspension"].unique()
Out[467]:
array([nan, 'independent,double wishbones(Adaptive Dampers)',
'Magnetorheological damper', 'adaptive magnetic suspension'],
dtype=object)
In [468]:
com=main.loc[main["Model"]=="Ferrari"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [469]:
com.unique()
Out[469]:
array([nan,
'Independent Double Wishbones(Adaptive Dampers) Front Suspension System',
'Magnetorheological Damper Front Suspension',
'Adaptive Magnetic Front Suspension'], dtype=object)
In [470]:
#Honda
In [471]:
main.loc[main["Model"]=="Honda"][["Front Suspension"]]["Front Suspension"].unique()
Out[471]:
array(['MacPherson Strut,Coil Spring', 'McPherson Strut with Coil Spring',
'MacPherson Strut, Coil Spring', 'McPherson Strut, Coil Spring',
'McPherson Strut with coil spring', 'Mcpherson Strut Coil Spring'],
dtype=object)
In [472]:
com=main.loc[main["Model"]=="Honda"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [473]:
com.unique()
Out[473]:
array(['Mac Pherson Strut & Coil Spring Front Suspension'], dtype=object)
In [474]:
#Hyundai
In [475]:
main.loc[main["Model"]=="Hyundai"][["Front Suspension"]]["Front Suspension"].unique()
Out[475]:
array(['McPherson strut with coil spring', 'McPherson strut',
'Mcpherson Strut', 'McPherson Strut'], dtype=object)
In [476]:
com=main.loc[main["Model"]=="Hyundai"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [477]:
com.unique()
Out[477]:
array(['Mac Pherson Strut & Coil Spring Front Suspension',
'Mac Pherson Strut Front Suspension'], dtype=object)
In [478]:
#Isuzu
In [479]:
main.loc[main["Model"]=="Isuzu"][["Front Suspension"]]["Front Suspension"].unique()
Out[479]:
array(['Independent Double wishbone coil springs gas shock absorbers stabiliser bar',
'Independent Double Wishbone,Coil Spring',
'Double Wishbone, Coil Spring',
'Independent Double Wishbone, Coil Spring'], dtype=object)
In [480]:
com=main.loc[main["Model"]=="Isuzu"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [481]:
com.unique()
Out[481]:
array(['Independent Double Wishbone Coil Spring Gas Shock Absorbers Stabiliser Bar Front Suspension',
'Independent Double Wishbone Coil Spring Front Suspension',
'Double Wishbone Coil Spring Front Suspension'], dtype=object)
In [482]:
#Jeep
In [483]:
main.loc[main["Model"]=="Jeep"][["Front Suspension"]]["Front Suspension"].unique()
Out[483]:
array(['McPherson Strut with Lower Control Arm',
'Mcpherson Strut with Frequency Selective Damping, HRS with Anti Roll Bar',
'Independent double wishbone', nan,
'Mcpherson Strut with Lower Control Arm'], dtype=object)
In [484]:
com=main.loc[main["Model"]=="Jeep"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [485]:
com.unique()
Out[485]:
array(['Mac Pherson Strut With Lower Control Arm Front Suspension',
'Mac Pherson Strut With Frequency Selective Damping Hrs With Anti Roll Bar Front Suspension',
'Independent Double Wishbone Front Suspension', nan], dtype=object)
In [486]:
#Kia
In [487]:
main.loc[main["Model"]=="Kia"][["Front Suspension"]]["Front Suspension"].unique()
Out[487]:
array(['Mcpherson strut with coil spring',
'McPherson Strut with Coil Spring',
'McPherson Strut with coil spring', 'McPherson Strut',
'Macpherson Strut with coil spring', 'McPherson suspension'],
dtype=object)
In [488]:
com=main.loc[main["Model"]=="Kia"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [489]:
com.unique()
Out[489]:
array(['Mac Pherson Strut & Coil Spring Front Suspension',
'Mac Pherson Strut Front Suspension'], dtype=object)
In [490]:
#Lamborghini
In [491]:
main.loc[main["Model"]=="Lamborghini"][["Front Suspension"]]["Front Suspension"].unique()
Out[491]:
array(['The Magneto Rheological Suspension',
'Aluminum double-wishbone suspension',
'Push rod magneto-rheologic active with horizontal dampers',
'adaptive air suspension'], dtype=object)
In [492]:
com=main.loc[main["Model"]=="Lamborghini"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [493]:
com.unique()
Out[493]:
array(['The Magneto Rheological Front Suspension',
'Aluminum Double-Wishbone Front Suspension',
'Push Rod Magneto-Rheologic Active & Horizontal Dampers Front Suspension',
'Adaptive Air Front Suspension'], dtype=object)
In [494]:
#Land_Rover
In [495]:
main.loc[main["Model"]=="Land_Rover"][["Front Suspension"]]["Front Suspension"].unique()
Out[495]:
array(['Electronic Air Suspension', 'Double wishbones Coil Suspension',
nan, 'MacPherson Strut'], dtype=object)
In [496]:
com=main.loc[main["Model"]=="Land_Rover"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [497]:
com.unique()
Out[497]:
array(['Electronic Air Front Suspension',
'Double Wishbones Coil Front Suspension', nan,
'Mac Pherson Strut Front Suspension'], dtype=object)
In [498]:
#Lexus
In [499]:
main.loc[main["Model"]=="Lexus"][["Front Suspension"]]["Front Suspension"].unique()
Out[499]:
array(['MacPherson Strut', 'Multi-link type,coil springs',
'AIR Adaptive Variable Suspension', 'MacPherson Struts',
'MacPherson struts', 'Double Wishbone with Stabilizer'],
dtype=object)
In [500]:
com=main.loc[main["Model"]=="Lexus"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [501]:
com.unique()
Out[501]:
array(['Mac Pherson Strut Front Suspension',
'Multi-Link Type Coil Spring Front Suspension',
'Air Adaptive Variable Front Suspension',
'Double Wishbone & Stabilizer Front Suspension'], dtype=object)
In [502]:
#Mahindra
In [503]:
main.loc[main["Model"]=="Mahindra"][["Front Suspension"]]["Front Suspension"].unique()
Out[503]:
array(['MacPherson Strut with anti-roll bar',
'Double Wishbone Suspension with Coil over Shocks with FDD & MTV-CL',
'Double Wishbone with Coil Spring', 'IFS Coil Spring',
'McPherson Strut Independent Suspension with FSD and Stabilizer bar',
'Independent McPherson Strut with Dual Path Mounts, Coil Spring',
'Independent Double Wishbone Front Suspension with Coil Over Damper & Stabiliser Bar',
nan, 'MacPherson Strut with anti-roll bar`',
'Rigid axle with leaf spring',
'MacPherson Type with Wishbone Link',
'Double Wish-bone Type, Independent Front Coil Spring',
'Double Wishbone'], dtype=object)
In [504]:
com=main.loc[main["Model"]=="Mahindra"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [505]:
com.unique()
Out[505]:
array(['Mac Pherson Strut With Anti-Roll Bar Front Suspension',
'Double Wishbone Front Suspension & Coil Over Shocks & Fdd & Mtv-Cl',
'Double Wishbone & Coil Spring Front Suspension',
'Ifs Coil Spring Front Suspension',
'Mac Pherson Strut Independent Front Suspension With Fsd And Stabilizer Bar',
'Independent Mac Pherson Strut & Dual Path Mounts & Coil Spring Front Suspension',
'Independent Double Wishbone Front Suspension & Coil Over Damper & Stabiliser Bar',
nan, 'Rigid Axle & Leaf Spring Front Suspension',
'Mac Pherson Type With Wishbone Link Front Suspension',
'Double Wish-Bone Type Independent Front Coil Spring Suspension',
'Double Wishbone Front Suspension'], dtype=object)
In [506]:
#Aston_Martin
In [507]:
main.loc[main["Model"]=="Aston_Martin"][["Front Suspension"]]["Front Suspension"].unique()
Out[507]:
array(['independent double wishbone design coil springs, anti-roll bar and adaptive damping',
'Independent double wishbone, coil springs, anti-roll bar and adaptive dampers',
'Independent double wishbone'], dtype=object)
In [508]:
com=main.loc[main["Model"]=="Aston_Martin"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [509]:
com.unique()
Out[509]:
array(['Independent Double Wishbone Design Coil Spring Anti-Roll Bar & Adaptive Damping Front Suspension System',
'Independent Double Wishbone Coil Spring Anti-Roll Bar & Adaptive Dampers Front Suspension System',
'Independent Double Wishbone Front Suspension'], dtype=object)
In [510]:
#Maserati
In [511]:
main.loc[main["Model"]=="Maserati"][["Front Suspension"]]["Front Suspension"].unique()
Out[511]:
array(['Air Adaptive Suspensions', 'Double Wishbone', nan,
'Quattroporte Sport GT S is fitted with the single-setting racing-style suspension system'],
dtype=object)
In [512]:
com=main.loc[main["Model"]=="Maserati"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [513]:
com.unique()
Out[513]:
array(['Air Adaptive Front Suspensions',
'Double Wishbone Front Suspension', nan,
'Quattroporte Sport Gt S Is Fitted & The Single-Setting Racing-Style Front Suspension System'],
dtype=object)
In [514]:
#Mercedes-Benz
In [515]:
main.loc[main["Model"]=="Mercedes-Benz"][["Front Suspension"]]["Front Suspension"].unique()
Out[515]:
array(['AIRMATIC', 'Air Suspension', 'AMG Suspension', nan,
'AMG RIDE CONTROL sports suspension', 'Adaptive Damping System',
'AMG RIDE CONTROL+', 'four-link axle suspension',
'SUSPENSION WITH ADAPTIVE DAMPING SYSTEM', 'ADAPTIVE DAMPING',
'AMG RIDE CONTROL suspension', 'AIRMATIC suspension',
'active roll stabilization intelligent suspension',
'Agility control', 'Adaptive Air Suspension', 'AMG RIDE CONTROL',
'DYNAMIC BODY CONTROL suspension', 'Independent Suspension',
'Rigid Leaf Spring', 'adaptive damping Suspension'], dtype=object)
In [516]:
com=main.loc[main["Model"]=="Mercedes-Benz"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [517]:
com.unique()
Out[517]:
array(['Airmatic Front Suspension', 'Air Front Suspension',
'Amg Front Suspension', nan,
'Amg Ride Control Sports Front Suspension',
'Adaptive Damping Front Suspension System',
'Amg Ride Control+ Front Suspension',
'Four-Link Axle Front Suspension',
'Front Suspension & Adaptive Damping System',
'Amg Ride Control Front Suspension',
'Active Roll Stabilization Intelligent Front Suspension',
'Agility Control Front Suspension',
'Adaptive Air Front Suspension',
'Dynamic Body Control Front Suspension',
'Independent Front Suspension',
'Rigid Leaf Spring Front Suspension'], dtype=object)
In [518]:
#MG
In [21]:
main.loc[main["Model"]=="MG"][["Front Suspension"]]["Front Suspension"].unique()
Out[21]:
array(['Macpherson Strut with Stabilizer bar',
'Mcpherson Strut with Stablizer bar',
'Dual Helix Independent Suspension', 'MacPherson Strut'],
dtype=object)
In [22]:
com=main.loc[main["Model"]=="MG"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [23]:
com.unique()
Out[23]:
array(['Mac Pherson Strut With Stabilizer Bar Front Suspension',
'Dual Helix Independent Front Suspension',
'Mac Pherson Strut Front Suspension'], dtype=object)
In [24]:
#Mini
In [25]:
main.loc[main["Model"]=="Mini"][["Front Suspension"]]["Front Suspension"].unique()
Out[25]:
array(['single joint spring-strut front axle', 'Sport Suspension',
'MacPherson Struct', nan, 'Single-Link Spring-Strut'], dtype=object)
In [26]:
com=main.loc[main["Model"]=="Mini"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [27]:
com.unique()
Out[27]:
array(['Single Joint Spring-Strut Front Axle Suspension',
'Sport Front Suspension', 'Mac Pherson Strut Front Suspension',
nan, 'Single-Link Spring-Strut Front Suspension'], dtype=object)
In [28]:
#Mitsubishi
In [29]:
main.loc[main["Model"]=="Mitsubishi"][["Front Suspension"]]["Front Suspension"].unique()
Out[29]:
array(['McPherson Strut with coil spring & stabilizer bar',
'MacPherson Coil Springs with Stablizer Bar',
'Double wishbone torsion bar with stabiliser bar',
'Independent, Double wishbone coil springs with stabilizer bar',
'McPherson Strut with coil spring & Stabilizer bar', nan,
'McPherson Strut With Bilstein Shock Absorbers,Eibach Springs & Stabilizer Bar',
'Independent Double Wishbone Coil Spring With Stabilizer Bar',
'McPherson Strut & Coil Spring', 'Double Wishbone',
'MacPherson strut with coil springs and stabilizer bar',
'Double Wishbone With Coil Spring Suspension'], dtype=object)
In [30]:
com=main.loc[main["Model"]=="Mitsubishi"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [31]:
com.unique()
Out[31]:
array(['Mac Pherson Strut & Coil Spring & Stabilizer Bar Front Suspension',
'Double Wishbone Torsion Bar & Stabiliser Bar Front Suspension',
'Independent Double Wishbone Coil Spring & Stabilizer Bar Front Suspension',
nan,
'Mac Pherson Strut & Bilstein Shock Absorbers Eibach Spring & Stabilizer Bar Front Suspension',
'Mac Pherson Strut & Coil Spring Front Suspension',
'Double Wishbone Front Suspension',
'Double Wishbone & Coil Spring Front Suspension'], dtype=object)
In [32]:
#Nissan
In [33]:
main.loc[main["Model"]=="Nissan"][["Front Suspension"]]["Front Suspension"].unique()
Out[33]:
array(['Mac Pherson strut with Lower Transverse link',
'McPherson Strut with Coil Spring', 'Double Wishbone'],
dtype=object)
In [34]:
com=main.loc[main["Model"]=="Nissan"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [35]:
com.unique()
Out[35]:
array(['Mac Pherson Strut With Lower Transverse Link Front Suspension',
'Mac Pherson Strut & Coil Spring Front Suspension',
'Double Wishbone Front Suspension'], dtype=object)
In [36]:
#Porsche
In [37]:
main.loc[main["Model"]=="Porsche"][["Front Suspension"]]["Front Suspension"].unique()
Out[37]:
array(['Aluminum double-wishbone, independent wheel suspension',
'McPherson spring-strut', 'Double wishbone',
'Adaptive air suspension', 'Active Suspension',
'spring-strut suspension', 'Aluminium double-wishbone front axle',
'Double Wishbone', 'Lightweight spring-strut suspension', nan,
'Adaptive air suspension including Porsche Active Suspension'],
dtype=object)
In [38]:
com=main.loc[main["Model"]=="Porsche"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [39]:
com.unique()
Out[39]:
array(['Aluminum Double-Wishbone Independent Wheel Front Suspension',
'Mac Pherson Spring-Strut Front Suspension',
'Double Wishbone Front Suspension',
'Adaptive Air Front Suspension', 'Active Front Suspension',
'Spring-Strut Front Suspension',
'Aluminium Double-Wishbone Front Axle Suspension',
'Lightweight Spring-Strut Front Suspension', nan,
'Adaptive Air Front Suspension Including Porsche Active'],
dtype=object)
In [40]:
#Renault
In [41]:
main.loc[main["Model"]=="Renault"][["Front Suspension"]]["Front Suspension"].unique()
Out[41]:
array(['Mac Pherson strut with Lower Transverse link',
'Macpherson strut with lower triangle & coil spring',
'Mac Pherson strut with lower Transverse link'], dtype=object)
In [42]:
com=main.loc[main["Model"]=="Renault"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [43]:
com.unique()
Out[43]:
array(['Mac Pherson Strut With Lower Transverse Link Front Suspension',
'Mac Pherson Strut & Lower Triangle & Coil Spring Front Suspension'],
dtype=object)
In [44]:
#Rolls-Royce
In [45]:
main.loc[main["Model"]=="Rolls-Royce"][["Front Suspension"]]["Front Suspension"].unique()
Out[45]:
array(['Double Wishbone', nan, 'Double wishbone front axle'], dtype=object)
In [46]:
com=main.loc[main["Model"]=="Rolls-Royce"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [47]:
com.unique()
Out[47]:
array(['Double Wishbone Front Suspension', nan,
'Double Wishbone Front Axle Suspension'], dtype=object)
In [48]:
#Skoda
In [49]:
main.loc[main["Model"]=="Skoda"][["Front Suspension"]]["Front Suspension"].unique()
Out[49]:
array(['McPherson suspension with lower triangular links and stabiliser bar',
'McPherson suspension with lower triangular links and torsion stabiliser',
'MacPherson suspension'], dtype=object)
In [50]:
com=main.loc[main["Model"]=="Skoda"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [51]:
com.unique()
Out[51]:
array(['Mac Pherson With Lower Triangular Links And Stabiliser Bar Front Suspension',
'Mac Pherson With Lower Triangular Links And Torsion Stabiliser Front Suspension',
'Mac Pherson Strut Front Suspension'], dtype=object)
In [52]:
#Tata
In [53]:
main.loc[main["Model"]=="Tata"][["Front Suspension"]]["Front Suspension"].unique()
Out[53]:
array(['Independent Lower Wishbone McPherson Strut with Coil Spring & Anti Roll Bar',
'Independent Lower Wishbone McPherson Strut with Coil Spring',
'Independent Lower Wishbone McPherson Dual Path Strut',
'Independent, Lower Wishbone, Mcpherson Strut With Coil Spring',
'Independent MacPherson dual path strut with coil spring',
'Independent, Lower wishbone, McPherson Strut with coil spring',
'Independent MacPherson strut with coil spring',
'Independent MacPherson Dual Path Strut with Coil Spring',
'Semi-elliptical leaf springs-6leaves', nan,
'Independent, Lower Wishbone, McPherson Strut with Coil Spring'],
dtype=object)
In [54]:
com=main.loc[main["Model"]=="Tata"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [55]:
com.unique()
Out[55]:
array(['Independent Lower Wishbone Mac Pherson Strut & Coil Spring & Anti Roll Bar Front Suspension',
'Independent Lower Wishbone Mac Pherson Strut & Coil Spring Front Suspension',
'Independent Lower Wishbone Mac Pherson Dual Path Strut Front Suspension',
'Independent Mac Pherson Dual Path Strut & Coil Spring Front Suspension',
'Independent Mac Pherson Strut & Coil Spring Front Suspension',
'Semi-Elliptical Leaf Spring-6Leaves Front Suspension', nan],
dtype=object)
In [56]:
#Toyota
In [57]:
main.loc[main["Model"]=="Toyota"][["Front Suspension"]]["Front Suspension"].unique()
Out[57]:
array(['Double Wishbone', 'Double wishbone',
'Double Wishbone With Torsion Bar', 'MacPherson Strut', nan],
dtype=object)
In [58]:
com=main.loc[main["Model"]=="Toyota"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [59]:
com.unique()
Out[59]:
array(['Double Wishbone Front Suspension',
'Double Wishbone & Torsion Bar Front Suspension',
'Mac Pherson Strut Front Suspension', nan], dtype=object)
In [60]:
#Volkswagen
In [61]:
main.loc[main["Model"]=="Volkswagen"][["Front Suspension"]]["Front Suspension"].unique()
Out[61]:
array(['McPherson suspension and stabiliser bar',
'McPherson strut with stabilizer bar',
'Mc-Pherson suspension and stabiliser bar',
'Independent suspension with coil spring'], dtype=object)
In [62]:
com=main.loc[main["Model"]=="Volkswagen"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [63]:
com.unique()
Out[63]:
array(['Mac Pherson Strut With Stabilizer Bar Front Suspension',
'Independent Front Suspension & Coil Spring'], dtype=object)
In [64]:
#Volvo
In [65]:
main.loc[main["Model"]=="Volvo"][["Front Suspension"]]["Front Suspension"].unique()
Out[65]:
array(['Air', nan, 'MacPherson Strut', 'sophisticated Suspension'],
dtype=object)
In [66]:
com=main.loc[main["Model"]=="Volvo"]["Front Suspension"].apply(preprocess_Front_Suspension)
In [67]:
com.unique()
Out[67]:
array(['Air Front Suspension', nan, 'Mac Pherson Strut Front Suspension',
'Sophisticated Front Suspension'], dtype=object)
In [68]:
#Applt the preprocess_Front_Suspention to all the brands
main["Front Suspension"]=main["Front Suspension"].apply(preprocess_Front_Suspension)
In [69]:
#preprocessed Front Suspension
len(main["Front Suspension"].unique())
Out[69]:
107
In [70]:
#Unpreprocessed Front Suspension
len(main_data["Front Suspension"].unique())
Out[70]:
170
In [71]:
main["Front Suspension"].unique()
Out[71]:
array(['Mac Pherson Strut Front Suspension',
'Mac Pherson Strut & Coil Spring Front Suspension', nan,
'Independent Mac Pherson Strut & Coil Spring & Anti-Roll Bar Front Suspension',
'Independent Mac Pherson Strut With Dual Path Mounts Front Suspension',
'Independent Mac Pherson Strut Front Suspension',
'Independent Mac Pherson Strut & Coil Spring Front Suspension',
'Double Ball Joint Mac Pherson Strut With Stabilizer Bar Front Suspension',
'Independent Mac Pherson Strut & Offset & Coil Spring Front Suspension',
'Independent Mac Pherson Strut & Offset & Coil Spring & Stabiliser Bar Front Suspension',
'Independent Double Wishbone & Torsion Bar Spring & Stabilizer Bar Front Suspension',
'Double Wishbone Front Suspension', 'Air Front Suspension',
'Underbody Guard & Heavy-Duty Front Suspension',
'Five-Link Front Suspension With Tubular Anti-Roll Bar',
'Rs Sports Front Suspension',
'Five-Link Front Axle With Tubular Anti-Roll Bar With Air Spring Suspension',
'Sport Adaptive Air Front Suspension',
'Adaptive Air Front Suspension',
'Rs Sports Front Suspension Plus & Drc',
'S Sports Front Suspension',
'Four Link Double Wishbones Front Suspension',
'Adaptive 2-Axle Air Front Suspension',
'Adaptive M-Specific Front Suspension',
'Adaptive M Front Suspension',
'Double Joint Spring Strut Front Suspension',
'Single-Joint Spring Strut Axle In Lightweight Aluminium-Steel Construction Front Suspension',
'M Sport Front Suspension', 'Independent Damping Front Suspension',
'Airmatic Front Suspension',
'Adaptive Front Suspension & Variable Shock Absorber',
'Adaptive Front Suspension', 'Air-Front Suspension',
'Independent Double Wishbones Front Suspension',
'Independent Double Wishbone & Coil Spring Front Suspension',
'Independent Double Wishbones(Adaptive Dampers) Front Suspension System',
'Magnetorheological Damper Front Suspension',
'Adaptive Magnetic Front Suspension',
'Independent Double Wishbone Coil Spring Gas Shock Absorbers Stabiliser Bar Front Suspension',
'Independent Double Wishbone Coil Spring Front Suspension',
'Double Wishbone Coil Spring Front Suspension',
'Mac Pherson Strut With Lower Control Arm Front Suspension',
'Mac Pherson Strut With Frequency Selective Damping Hrs With Anti Roll Bar Front Suspension',
'Independent Double Wishbone Front Suspension',
'The Magneto Rheological Front Suspension',
'Aluminum Double-Wishbone Front Suspension',
'Push Rod Magneto-Rheologic Active & Horizontal Dampers Front Suspension',
'Electronic Air Front Suspension',
'Double Wishbones Coil Front Suspension',
'Multi-Link Type Coil Spring Front Suspension',
'Air Adaptive Variable Front Suspension',
'Double Wishbone & Stabilizer Front Suspension',
'Mac Pherson Strut With Anti-Roll Bar Front Suspension',
'Double Wishbone Front Suspension & Coil Over Shocks & Fdd & Mtv-Cl',
'Double Wishbone & Coil Spring Front Suspension',
'Ifs Coil Spring Front Suspension',
'Mac Pherson Strut Independent Front Suspension With Fsd And Stabilizer Bar',
'Independent Mac Pherson Strut & Dual Path Mounts & Coil Spring Front Suspension',
'Independent Double Wishbone Front Suspension & Coil Over Damper & Stabiliser Bar',
'Rigid Axle & Leaf Spring Front Suspension',
'Mac Pherson Type With Wishbone Link Front Suspension',
'Double Wish-Bone Type Independent Front Coil Spring Suspension',
'Independent Double Wishbone Design Coil Spring Anti-Roll Bar & Adaptive Damping Front Suspension System',
'Independent Double Wishbone Coil Spring Anti-Roll Bar & Adaptive Dampers Front Suspension System',
'Air Adaptive Front Suspensions',
'Quattroporte Sport Gt S Is Fitted & The Single-Setting Racing-Style Front Suspension System',
'Amg Front Suspension', 'Amg Ride Control Sports Front Suspension',
'Adaptive Damping Front Suspension System',
'Amg Ride Control+ Front Suspension',
'Four-Link Axle Front Suspension',
'Front Suspension & Adaptive Damping System',
'Amg Ride Control Front Suspension',
'Active Roll Stabilization Intelligent Front Suspension',
'Agility Control Front Suspension',
'Dynamic Body Control Front Suspension',
'Independent Front Suspension',
'Rigid Leaf Spring Front Suspension',
'Mac Pherson Strut With Stabilizer Bar Front Suspension',
'Dual Helix Independent Front Suspension',
'Single Joint Spring-Strut Front Axle Suspension',
'Sport Front Suspension',
'Single-Link Spring-Strut Front Suspension',
'Mac Pherson Strut & Coil Spring & Stabilizer Bar Front Suspension',
'Double Wishbone Torsion Bar & Stabiliser Bar Front Suspension',
'Independent Double Wishbone Coil Spring & Stabilizer Bar Front Suspension',
'Mac Pherson Strut & Bilstein Shock Absorbers Eibach Spring & Stabilizer Bar Front Suspension',
'Mac Pherson Strut With Lower Transverse Link Front Suspension',
'Aluminum Double-Wishbone Independent Wheel Front Suspension',
'Mac Pherson Spring-Strut Front Suspension',
'Active Front Suspension', 'Spring-Strut Front Suspension',
'Aluminium Double-Wishbone Front Axle Suspension',
'Lightweight Spring-Strut Front Suspension',
'Adaptive Air Front Suspension Including Porsche Active',
'Mac Pherson Strut & Lower Triangle & Coil Spring Front Suspension',
'Double Wishbone Front Axle Suspension',
'Mac Pherson With Lower Triangular Links And Stabiliser Bar Front Suspension',
'Mac Pherson With Lower Triangular Links And Torsion Stabiliser Front Suspension',
'Independent Lower Wishbone Mac Pherson Strut & Coil Spring & Anti Roll Bar Front Suspension',
'Independent Lower Wishbone Mac Pherson Strut & Coil Spring Front Suspension',
'Independent Lower Wishbone Mac Pherson Dual Path Strut Front Suspension',
'Independent Mac Pherson Dual Path Strut & Coil Spring Front Suspension',
'Semi-Elliptical Leaf Spring-6Leaves Front Suspension',
'Double Wishbone & Torsion Bar Front Suspension',
'Independent Front Suspension & Coil Spring',
'Sophisticated Front Suspension'], dtype=object)
In [73]:
main["Rear Suspension"]
Out[73]:
0 Torsion Beam
1 NaN
...
548 NaN
549 NaN
Name: Rear Suspension, Length: 160031, dtype: object
In [74]:
main["Rear Suspension"].unique()
Out[74]:
array(['Torsion Beam', nan, 'Torsion Beam & coil spring',
'Torsion Beam with Coil Spring', '3-Link Rigid Axle',
'Torsion Beam & Coil Spring', 'Leaf Spring Rigid Axle',
'Semi-independent twist beam with twin gas and oil filled shock absorbers',
'Semi Independent Twist Beam, Coil Springs',
'Semi Independent Twist Beam',
'Heavy duty twist-beam with strut-type coil spring/damper units',
'Semi-independent twist beam',
'Semi Independent (Twist Beam Type)',
'Coil Spring with Anti Roll Bar', 'Twist Beam',
'Semi-independent twist beam with twin shock absorbers filled with gas & oil',
'Semi-Independent Twist Beam', 'Semi Independent',
'Integral Link Independent with Coil Springs & Stabilizer Bar',
'Semi-Independent heavy duty twist-beam with coil springs',
'Progressive Linear Rate Leaf Springs With Low Friction Pads',
'Semi-Independent Twist Beam With Twin Shock Absorbers Filled With Gas & Oil',
'Coil Spring, Watts Linkage Type with Anti-roll Bar',
'Progessive Linear Rate Leaf Springs With Low Friction Pads',
'Semi-independent Twist Beam With Twin Shock Absorbers Filled With Gas & Oil',
'Leaf Spring',
'Progressive linear rate leaf springs with low friction pads',
'Semi-Independent Twist Beam With Twin Shock Absorbers filled with gas & oil',
'Air Suspension', '4-link',
'Five-link front suspension; tubular anti-roll bar', 'RS Sports',
'Five-link front axle; tubular anti-roll bar; air spring suspension',
'Sport Adaptive Air Suspension', 'Adaptive Air Suspension',
'RS Adaptive Air Suspension', 'S Sports suspension',
'Trapezoidak muliti-Link', 'Air suspension',
'Adaptive 2-axle Air Suspension', 'Adaptive M-specific Suspension',
'Adaptive M suspension', 'Five Arm', 'Dynamic Damper Control',
'M Sport Suspension', 'independent damping', 'AIRMATIC suspension',
'Adaptive air suspension',
'Adaptive Suspension with variable shock absorber',
'Adaptive Suspension', 'Adaptive M-Specific Suspension',
'Adaptive M Suspension', 'Air-Suspension',
'Independent Double wishbones',
'Multi-link with Pan hard rod & Coil Spring',
'independent,multi-link(Adaptive Dampers)',
'Magnetorheological damper', 'adaptive magnetic suspension',
'independent, multi-link(Adaptive Dampers)',
'Torsion Beam Axle,Coil Spring',
'Twisted Torsion Beam, Coil Spring', 'Torsion Bar, Coil Spring',
'Torsion beam with coil spring', 'Torsion Beam Axle, Coil Spring',
'Torsion Beam axle, Coil Spring', 'Coupled torsion beam axle',
'Coupled Torsion Beam Axle',
'Coupled torsion beam axle with coil spring',
'Multi-link with coil spring',
'Penta-link coil suspension gas shock absorbers stabiliser bar',
'Soft ride,Leaf Spring', 'Semi-Elliptic Leaf Spring',
'Soft Ride, Leaf Spring',
'Multi Link Suspension with Strut Assembly',
'Multi-Link with Strut Suspension with FSD, with Anti Roll Bar',
'heavy duty with gas shocks', 'Multi Link Suspension',
'Coupled Torsion Beam Axle with Coil Spring', 'Multi Link',
'Multi-Link', 'Aluminum double-wishbone suspension',
'Push rod magneto-rheologic active with horizontal dampers',
'adaptive air suspension', 'Multi-link',
'Electronic Air Suspension', 'Integral Coil Spring',
'Double-wishbone', 'multi-link suspension',
'AIR Adaptive Variable Suspension', 'Double Wishbone Suspension',
'Double Wishbone', '4-link Type with Coil Springs',
'Twist beam suspension with Coil Spring',
'Pentalink Suspension with WATT’s Linkage with FDD & MTV-CL',
'5 Link Rear Suspension with Coil Spring', 'Rigid leaf Spring',
'Multi-Link Independent Suspension with FSD Stabilizer bar',
'Semi-independent Twist Beam with Coil Spring',
'Multilink Solid Rear Axle with Coil Over Damper & Stabiliser Bar',
'Rigid axle with leaf spring',
'H-Section Torsion Beam with Coil Spring',
'Multi Link Coil Spring Suspension and Anti-roll Bar',
'multi-link, coil springs, anti-roll bar and adaptive damping Adaptive Damping System',
'Multi-link, coil springs, anti-roll bar and adaptive dampers Adaptive Damping System',
'Air Adaptive Suspensions', 'Five-Arm Multilink',
'Quattroporte Sport GT S is fitted with the single-setting racing-style suspension system',
'AIRMATIC', 'AMG Suspension', 'AMG RIDE CONTROL sports suspension',
'Adaptive Damping System', 'AMG RIDE CONTROL+',
'five-link multi-link independent suspension',
'SUSPENSION WITH ADAPTIVE DAMPING SYSTEM', 'ADAPTIVE DAMPING',
'AMG RIDE CONTROL suspension', 'air suspension',
'active roll stabilization intelligent suspension',
'Agility control', 'AMG RIDE CONTROL',
'DYNAMIC BODY CONTROL suspension', 'Coil spring',
'Rigid Leaf Spring', 'adaptive damping Suspension',
'Semi Independent Helical Spring Torison Beam',
'Semi Independent Helical Spring Torsion Beam',
'Five Link Integral Suspension', 'multiple control-arm rear axle',
'Sport Suspension', 'Multiple-Control-Arm',
'Independent Multi-link with Stabilizer bar',
'Multi-Link Coil Spring with Stablizer Bar',
'3 Link coil spring rigid axle with stabiliser bar',
'Multi-link coil springs with stabilizer bar',
'Independent Multi-link with stabilizer bar',
'Multi-Link With Bilstein Shock Absorbers,Eibach Springs & Stabilizer Bar',
'Multi-Link Coil Springs With Stabilizer', '3 Link',
'Multi link with coil springs and stabilizer bar',
'3 Link Coil Spring Suspension',
'Twin tube telescopic shock absorber',
'Torsion Beam with Coil Springs',
'Aluminum multi-link axle with subframe, independent wheel suspension',
'Active Suspension', 'spring-strut suspension',
'Aluminium multi-link rear axle', 'Self-Tracking Trapezoidal Link',
'Lightweight spring-strut suspension',
'Twist beam suspension with coil spring', 'Torsion beam axle',
'multi-link rear axle', 'Twist Beam Axle',
'Multi-element axle, with longitudinal and transverse links, with torsion stabiliser',
'Multilink suspension, one longitudinal and three transverse arms',
'Multi-element axle, with one longitudinal and transverse links, with torsion stabiliser',
'Semi Independent Twist Blade with Panhard Rod & Coil Spring',
'Semi-Independent closed profile Twist beam with Coil Spring and shock absorber',
'Twist Beam with Coil Spring',
'Semi-independent Twist Beam With Coil Spring And Shock Absorber',
'Twist beam with coil spring and shock absorber',
'Semi-independent; Rear Twist Beam with Dual path Strut',
'Semi Independent Twist Blade with Panhard Rod and Coil Spring',
'Twist beam with dual path Strut',
'Twist beam with dual path strut',
'Twist Beam with Coil Spring and Shock Absorber',
'Innovative Two-stage semi-elliptical leaf springs',
'Semi-independent Closed Profile Twist Beam with Dual Path Strut',
'Semi-Independent Closed Profile Twist Beam with Dual Path Strut',
'Innovative Two-stage Semi-elliptical leaf springs-7leaves',
'Rear Twist Beam with Coil Spring',
'Twist Beam with Coil Spring and Shock Absorberf',
'4-Link With Coil Spring', 'leaf spring',
'4-Link with Coil Spring', 'Twist beam axle',
'Semi Indpendent Trailing Arm', 'Twist beam axle`',
'Independent suspension by four-link axle', 'Air',
'sophisticated Suspension'], dtype=object)
In [75]:
def preprocess_Rear_Suspension(text):
text=str(text)
list_of_symbols_tonull_out=["Type","(",")","Front"]
if text=="nan":
return np.nan
text=text.title()
if "&" in text:
text=text.replace("&","With")
if "," in text:
w_index=text.find(",")
if text[w_index+2]=="W":
text=text.replace(",","")
elif text[w_index+1]!=" " and text[w_index-1]!=" ":
text=text.replace(","," With ")
else:
text=text.replace(","," With")
if "With" in text:
with_index=text.find("With")
if text[with_index+4]!=" ":
text=text[:with_index+4]+" "+text[with_index+4:]
if "-" in text:
text=text.replace("-"," ")
for i in list_of_symbols_tonull_out:
if i=="Type" and "(" not in text and ")" not in text:
continue
front_index=text.find("Front")
try:
if text[front_index+6]=="S":
text=text.replace("{}".format(i),"")
continue
except:
pass
if i=="(":
text=text.replace("{}".format(i)," ")
else:
text=text.replace("{}".format(i),"")
if "One" in text:
text=text.replace("One","")
if " " in text:
text=text.replace(" "," ")
if ";" in text:
text=text.replace(";"," With")
if "Adaptive Damp" in text:
for_index=text.find("Adaptive Damp")
bac_index=text.rfind("Adaptive Damp")
if for_index!=bac_index:
text=text[:for_index+17]
if "Damper" in text:
text=text.replace("Dampers","Damping")
if text[len(text)-1]==" ":
text=text[:len(text)-1]
if "Torison" in text:
text=text.replace("Torison","Torsion")
if "Link" in text and "Coil" in text:
link_index=text.find("Link")
if text[link_index+5]=="W":
text=text[:link_index+4]+text[link_index+9:]
if "Springs" in text:
text=text.replace("Springs","Spring")
if "Stablizer" in text:
text=text.replace("Stablizer","Stabilizer")
if "Stabilizer" in text:
if "Bar" not in text:
sta_index=text.find("Stabilizer")
text=text[:sta_index+10]+" Bar"+text[sta_index+10:]
if "Spring" in text:
sta_index=text.find("Spring")
text=text[:sta_index+6]+" With"+text[sta_index+10:]
if "Withh" in text:
text=text.replace("Withh","With")
if "Absorberf" in text:
text=text.replace("Absorberf","Absorber")
if "`" in text:
text=text.replace("`","")
if "Rear" not in text:
if "Suspension" not in text:
text=text+" Rear Suspension"
else:
find_=text.find("Suspension")
text=text[:find_]+"Rear "+text[find_:]
else:
text=text+" Suspension"
return text
In [76]:
txt="independent,multi-link(Adaptive Dampers)"
In [77]:
preprocess_Rear_Suspension(txt)
Out[77]:
'Independent With Multi Link Adaptive Damping Rear Suspension'
In [78]:
#Maruti
In [79]:
main.loc[main["Model"]=="Maruti"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[79]:
array(['Torsion Beam', nan, 'Torsion Beam & coil spring',
'Torsion Beam with Coil Spring', '3-Link Rigid Axle',
'Torsion Beam & Coil Spring', 'Leaf Spring Rigid Axle'],
dtype=object)
In [80]:
com=main.loc[main["Model"]=="Maruti"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [81]:
com.unique()
Out[81]:
array(['Torsion Beam Rear Suspension', nan,
'Torsion Beam With Coil Spring Rear Suspension',
'3 Link Rigid Axle Rear Suspension',
'Leaf Spring Rigid Axle Rear Suspension'], dtype=object)
In [82]:
#Ford
In [83]:
main.loc[main["Model"]=="Ford"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[83]:
array(['Semi-independent twist beam with twin gas and oil filled shock absorbers',
'Semi Independent Twist Beam, Coil Springs',
'Semi Independent Twist Beam',
'Heavy duty twist-beam with strut-type coil spring/damper units',
'Semi-independent twist beam',
'Semi Independent (Twist Beam Type)',
'Coil Spring with Anti Roll Bar', 'Twist Beam',
'Semi-independent twist beam with twin shock absorbers filled with gas & oil',
nan, 'Semi-Independent Twist Beam', 'Semi Independent',
'Integral Link Independent with Coil Springs & Stabilizer Bar',
'Semi-Independent heavy duty twist-beam with coil springs',
'Progressive Linear Rate Leaf Springs With Low Friction Pads',
'Semi-Independent Twist Beam With Twin Shock Absorbers Filled With Gas & Oil',
'Coil Spring, Watts Linkage Type with Anti-roll Bar',
'Progessive Linear Rate Leaf Springs With Low Friction Pads',
'Semi-independent Twist Beam With Twin Shock Absorbers Filled With Gas & Oil',
'Leaf Spring',
'Progressive linear rate leaf springs with low friction pads',
'Semi-Independent Twist Beam With Twin Shock Absorbers filled with gas & oil'],
dtype=object)
In [84]:
com=main.loc[main["Model"]=="Ford"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [85]:
com.unique()
Out[85]:
array(['Semi Independent Twist Beam With Twin Gas And Oil Filled Shock Absorbers Rear Suspension',
'Semi Independent Twist Beam With Coil Spring Rear Suspension',
'Semi Independent Twist Beam Rear Suspension',
'Heavy Duty Twist Beam With Strut Type Coil Spring/Damper Units Rear Suspension',
'Coil Spring With Anti Roll Bar Rear Suspension',
'Twist Beam Rear Suspension',
'Semi Independent Twist Beam With Twin Shock Absorbers Filled With Gas With Oil Rear Suspension',
nan, 'Semi Independent Rear Suspension',
'Integral Link Independent With Coil Spring With Stabilizer Bar Rear Suspension',
'Semi Independent Heavy Duty Twist Beam With Coil Spring Rear Suspension',
'Progressive Linear Rate Leaf Spring With Low Friction Pads Rear Suspension',
'Coil Spring Watts Linkage Type With Anti Roll Bar Rear Suspension',
'Progessive Linear Rate Leaf Spring With Low Friction Pads Rear Suspension',
'Leaf Spring Rear Suspension'], dtype=object)
In [86]:
#Audi
In [87]:
main.loc[main["Model"]=="Audi"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[87]:
array(['Air Suspension', '4-link', nan,
'Five-link front suspension; tubular anti-roll bar', 'RS Sports',
'Five-link front axle; tubular anti-roll bar; air spring suspension',
'Sport Adaptive Air Suspension', 'Adaptive Air Suspension',
'RS Adaptive Air Suspension', 'S Sports suspension'], dtype=object)
In [88]:
com=main.loc[main["Model"]=="Audi"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [89]:
com.unique()
Out[89]:
array(['Air Rear Suspension', '4 Link Rear Suspension', nan,
'Five Link Rear Suspension With Tubular Anti Roll Bar',
'Rs Sports Rear Suspension',
'Five Link Axle With Tubular Anti Roll Bar With Air Spring Rear Suspension',
'Sport Adaptive Air Rear Suspension',
'Adaptive Air Rear Suspension', 'Rs Adaptive Air Rear Suspension',
'S Sports Rear Suspension'], dtype=object)
In [90]:
#Bentley
In [91]:
main.loc[main["Model"]=="Bentley"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[91]:
array(['Air Suspension', 'Trapezoidak muliti-Link', 'Air suspension'],
dtype=object)
In [92]:
com=main.loc[main["Model"]=="Bentley"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [93]:
com.unique()
Out[93]:
array(['Air Rear Suspension', 'Trapezoidak Muliti Link Rear Suspension'],
dtype=object)
In [94]:
#Force
In [95]:
main.loc[main["Model"]=="Force"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[95]:
array(['Multi-link with Pan hard rod & Coil Spring'], dtype=object)
In [96]:
com=main.loc[main["Model"]=="Force"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [97]:
com.unique()
Out[97]:
array(['Multi Link Pan Hard Rod With Coil Spring Rear Suspension'],
dtype=object)
In [98]:
#Ferrari
In [99]:
main.loc[main["Model"]=="Ferrari"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[99]:
array([nan, 'independent,multi-link(Adaptive Dampers)',
'Magnetorheological damper', 'adaptive magnetic suspension',
'independent, multi-link(Adaptive Dampers)'], dtype=object)
In [100]:
com=main.loc[main["Model"]=="Ferrari"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [101]:
com.unique()
Out[101]:
array([nan,
'Independent With Multi Link Adaptive Damping Rear Suspension',
'Magnetorheological Damper Rear Suspension',
'Adaptive Magnetic Rear Suspension'], dtype=object)
In [102]:
#Honda
In [103]:
main.loc[main["Model"]=="Honda"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[103]:
array(['Torsion Beam Axle,Coil Spring', 'Torsion Beam with Coil Spring',
'Twisted Torsion Beam, Coil Spring', 'Torsion Bar, Coil Spring',
'Torsion beam with coil spring', 'Torsion Beam Axle, Coil Spring',
'Torsion Beam axle, Coil Spring'], dtype=object)
In [104]:
com=main.loc[main["Model"]=="Honda"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [105]:
com.unique()
Out[105]:
array(['Torsion Beam Axle With Coil Spring Rear Suspension',
'Torsion Beam With Coil Spring Rear Suspension',
'Twisted Torsion Beam With Coil Spring Rear Suspension',
'Torsion Bar With Coil Spring Rear Suspension'], dtype=object)
In [106]:
#Hyundai
In [107]:
main.loc[main["Model"]=="Hyundai"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[107]:
array(['Coupled torsion beam axle', 'Coupled Torsion Beam Axle',
'Coupled torsion beam axle with coil spring',
'Multi-link with coil spring'], dtype=object)
In [108]:
com=main.loc[main["Model"]=="Hyundai"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [109]:
com.unique()
Out[109]:
array(['Coupled Torsion Beam Axle Rear Suspension',
'Coupled Torsion Beam Axle With Coil Spring Rear Suspension',
'Multi Link Coil Spring Rear Suspension'], dtype=object)
In [110]:
#Isuzu
In [111]:
main.loc[main["Model"]=="Isuzu"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[111]:
array(['Penta-link coil suspension gas shock absorbers stabiliser bar',
'Soft ride,Leaf Spring', 'Semi-Elliptic Leaf Spring',
'Soft Ride, Leaf Spring'], dtype=object)
In [112]:
com=main.loc[main["Model"]=="Isuzu"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [113]:
com.unique()
Out[113]:
array(['Penta Link Coil Rear Suspension Gas Shock Absorbers Stabiliser Bar',
'Soft Ride With Leaf Spring Rear Suspension',
'Semi Elliptic Leaf Spring Rear Suspension'], dtype=object)
In [114]:
#Jeep
In [115]:
main.loc[main["Model"]=="Jeep"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[115]:
array(['Multi Link Suspension with Strut Assembly',
'Multi-Link with Strut Suspension with FSD, with Anti Roll Bar',
'heavy duty with gas shocks', nan, 'Multi Link Suspension'],
dtype=object)
In [116]:
com=main.loc[main["Model"]=="Jeep"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [117]:
com.unique()
Out[117]:
array(['Multi Link Rear Suspension With Strut Assembly',
'Multi Link With Strut Rear Suspension With Fsd With Anti Roll Bar',
'Heavy Duty With Gas Shocks Rear Suspension', nan,
'Multi Link Rear Suspension'], dtype=object)
In [118]:
#Kia
In [119]:
main.loc[main["Model"]=="Kia"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[119]:
array(['Coupled Torsion Beam Axle with Coil Spring',
'Coupled Torsion Beam Axle', 'Multi Link', 'Multi-Link'],
dtype=object)
In [120]:
com=main.loc[main["Model"]=="Kia"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [121]:
com.unique()
Out[121]:
array(['Coupled Torsion Beam Axle With Coil Spring Rear Suspension',
'Coupled Torsion Beam Axle Rear Suspension',
'Multi Link Rear Suspension'], dtype=object)
In [122]:
#Lamborghini
In [123]:
main.loc[main["Model"]=="Lamborghini"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[123]:
array([nan, 'Aluminum double-wishbone suspension',
'Push rod magneto-rheologic active with horizontal dampers',
'adaptive air suspension'], dtype=object)
In [124]:
com=main.loc[main["Model"]=="Lamborghini"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [125]:
com.unique()
Out[125]:
array([nan, 'Aluminum Double Wishbone Rear Suspension',
'Push Rod Magneto Rheologic Active With Horizontal Dampers Rear Suspension',
'Adaptive Air Rear Suspension'], dtype=object)
In [126]:
#Land_Rover
In [127]:
main.loc[main["Model"]=="Land_Rover"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[127]:
array(['Multi-link', nan, 'Electronic Air Suspension',
'Integral Coil Spring'], dtype=object)
In [128]:
com=main.loc[main["Model"]=="Land_Rover"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [129]:
com.unique()
Out[129]:
array(['Multi Link Rear Suspension', nan,
'Electronic Air Rear Suspension',
'Integral Coil Spring Rear Suspension'], dtype=object)
In [130]:
#Lexus
In [131]:
main.loc[main["Model"]=="Lexus"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[131]:
array(['Double-wishbone', 'multi-link suspension',
'AIR Adaptive Variable Suspension', 'Double Wishbone Suspension',
'Double Wishbone', '4-link Type with Coil Springs'], dtype=object)
In [132]:
com=main.loc[main["Model"]=="Lexus"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [133]:
com.unique()
Out[133]:
array(['Double Wishbone Rear Suspension', 'Multi Link Rear Suspension',
'Air Adaptive Variable Rear Suspension',
'4 Link Type With Coil Spring Rear Suspension'], dtype=object)
In [134]:
#Mahindra
In [135]:
main.loc[main["Model"]=="Mahindra"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[135]:
array(['Twist beam suspension with Coil Spring',
'Pentalink Suspension with WATT’s Linkage with FDD & MTV-CL',
'5 Link Rear Suspension with Coil Spring', 'Rigid leaf Spring',
'Multi-Link Independent Suspension with FSD Stabilizer bar',
'Semi-independent Twist Beam with Coil Spring',
'Multilink Solid Rear Axle with Coil Over Damper & Stabiliser Bar',
nan, 'Rigid axle with leaf spring',
'H-Section Torsion Beam with Coil Spring',
'Multi Link Coil Spring Suspension and Anti-roll Bar',
'Twist Beam'], dtype=object)
In [136]:
com=main.loc[main["Model"]=="Mahindra"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [137]:
com.unique()
Out[137]:
array(['Twist Beam Rear Suspension With Coil Spring',
'Pentalink Rear Suspension With Watt’S Linkage With Fdd With Mtv Cl',
'5 Link Rear Suspension With Coil Spring Suspension',
'Rigid Leaf Spring Rear Suspension',
'Multi Link Independent Rear Suspension With Fsd Stabilizer Bar',
'Semi Independent Twist Beam With Coil Spring Rear Suspension',
'Multilink Solid Rear Axle With Coil Over Damper With Stabiliser Bar Suspension',
nan, 'Rigid Axle With Leaf Spring Rear Suspension',
'H Section Torsion Beam With Coil Spring Rear Suspension',
'Multi Link Coil Spring Rear Suspension And Anti Roll Bar',
'Twist Beam Rear Suspension'], dtype=object)
In [138]:
#Aston_Martin
In [139]:
main.loc[main["Model"]=="Aston_Martin"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[139]:
array(['multi-link, coil springs, anti-roll bar and adaptive damping Adaptive Damping System',
'Multi-link, coil springs, anti-roll bar and adaptive dampers Adaptive Damping System',
'Multi-link'], dtype=object)
In [140]:
com=main.loc[main["Model"]=="Aston_Martin"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [141]:
com.unique()
Out[141]:
array(['Multi Link Coil Spring With Anti Roll Bar And Adaptive Damping Rear Suspension',
'Multi Link Rear Suspension'], dtype=object)
In [142]:
#Maserati
In [143]:
main.loc[main["Model"]=="Maserati"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[143]:
array(['Air Adaptive Suspensions', 'Five-Arm Multilink', nan,
'Quattroporte Sport GT S is fitted with the single-setting racing-style suspension system'],
dtype=object)
In [144]:
com=main.loc[main["Model"]=="Maserati"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [145]:
com.unique()
Out[145]:
array(['Air Adaptive Rear Suspensions',
'Five Arm Multilink Rear Suspension', nan,
'Quattroporte Sport Gt S Is Fitted With The Single Setting Racing Style Rear Suspension System'],
dtype=object)
In [146]:
#Mercedes-Benz
In [147]:
main.loc[main["Model"]=="Mercedes-Benz"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[147]:
array(['AIRMATIC', 'Air Suspension', 'AMG Suspension', nan,
'AMG RIDE CONTROL sports suspension', 'Adaptive Damping System',
'AMG RIDE CONTROL+', 'five-link multi-link independent suspension',
'SUSPENSION WITH ADAPTIVE DAMPING SYSTEM', 'ADAPTIVE DAMPING',
'AMG RIDE CONTROL suspension', 'air suspension',
'active roll stabilization intelligent suspension',
'AIRMATIC suspension', 'Agility control',
'Adaptive Air Suspension', 'AMG RIDE CONTROL',
'DYNAMIC BODY CONTROL suspension', 'Coil spring',
'Rigid Leaf Spring', 'adaptive damping Suspension'], dtype=object)
In [148]:
com=main.loc[main["Model"]=="Mercedes-Benz"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [149]:
com.unique()
Out[149]:
array(['Airmatic Rear Suspension', 'Air Rear Suspension',
'Amg Rear Suspension', nan,
'Amg Ride Control Sports Rear Suspension',
'Adaptive Damping System Rear Suspension',
'Amg Ride Control+ Rear Suspension',
'Five Link Multi Link Independent Rear Suspension',
'Rear Suspension With Adaptive Damping System',
'Adaptive Damping Rear Suspension',
'Amg Ride Control Rear Suspension',
'Active Roll Stabilization Intelligent Rear Suspension',
'Agility Control Rear Suspension', 'Adaptive Air Rear Suspension',
'Dynamic Body Control Rear Suspension',
'Coil Spring Rear Suspension', 'Rigid Leaf Spring Rear Suspension'],
dtype=object)
In [150]:
#MG
In [151]:
main.loc[main["Model"]=="MG"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[151]:
array(['Semi Independent Helical Spring Torison Beam',
'Semi Independent Helical Spring Torsion Beam',
'Five Link Integral Suspension', 'Torsion Beam'], dtype=object)
In [152]:
com=main.loc[main["Model"]=="MG"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [153]:
com.unique()
Out[153]:
array(['Semi Independent Helical Spring Torsion Beam Rear Suspension',
'Five Link Integral Rear Suspension',
'Torsion Beam Rear Suspension'], dtype=object)
In [154]:
#Mini
In [155]:
main.loc[main["Model"]=="Mini"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[155]:
array(['multiple control-arm rear axle', 'Sport Suspension', 'Multi Link',
nan, 'Multiple-Control-Arm'], dtype=object)
In [156]:
com=main.loc[main["Model"]=="Mini"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [157]:
com.unique()
Out[157]:
array(['Multiple Control Arm Rear Axle Suspension',
'Sport Rear Suspension', 'Multi Link Rear Suspension', nan,
'Multiple Control Arm Rear Suspension'], dtype=object)
In [158]:
#Mitsubishi
In [159]:
main.loc[main["Model"]=="Mitsubishi"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[159]:
array(['Independent Multi-link with Stabilizer bar',
'Multi-Link Coil Spring with Stablizer Bar',
'3 Link coil spring rigid axle with stabiliser bar',
'Multi-link coil springs with stabilizer bar',
'Independent Multi-link with stabilizer bar', nan,
'Multi-Link With Bilstein Shock Absorbers,Eibach Springs & Stabilizer Bar',
'Multi-Link Coil Springs With Stabilizer', 'Multi-Link', '3 Link',
'Multi link with coil springs and stabilizer bar',
'3 Link Coil Spring Suspension'], dtype=object)
In [160]:
com=main.loc[main["Model"]=="Mitsubishi"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [161]:
com.unique()
Out[161]:
array(['Independent Multi Link With Stabilizer Bar Rear Suspension',
'Multi Link Coil Spring With Stabilizer Bar Rear Suspension',
'3 Link Coil Spring Rigid Axle With Stabiliser Bar Rear Suspension',
nan,
'Multi Link With Bilstein Shock Absorbers With Eibach Spring With Stabilizer Bar Rear Suspension',
'Multi Link Rear Suspension', '3 Link Rear Suspension',
'3 Link Coil Spring Rear Suspension'], dtype=object)
In [162]:
#Nissan
In [163]:
main.loc[main["Model"]=="Nissan"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[163]:
array(['Twin tube telescopic shock absorber',
'Torsion Beam with Coil Springs', 'Multi Link'], dtype=object)
In [164]:
com=main.loc[main["Model"]=="Nissan"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [165]:
com.unique()
Out[165]:
array(['Twin Tube Telescopic Shock Absorber Rear Suspension',
'Torsion Beam With Coil Spring Rear Suspension',
'Multi Link Rear Suspension'], dtype=object)
In [166]:
#Porsche
In [167]:
main.loc[main["Model"]=="Porsche"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[167]:
array(['Aluminum multi-link axle with subframe, independent wheel suspension',
'Multi-Link', 'Adaptive air suspension', 'Active Suspension',
'spring-strut suspension', 'Aluminium multi-link rear axle',
'Self-Tracking Trapezoidal Link',
'Lightweight spring-strut suspension', nan], dtype=object)
In [168]:
com=main.loc[main["Model"]=="Porsche"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [169]:
com.unique()
Out[169]:
array(['Aluminum Multi Link Axle With Subframe With Independent Wheel Rear Suspension',
'Multi Link Rear Suspension', 'Adaptive Air Rear Suspension',
'Active Rear Suspension', 'Spring Strut Rear Suspension',
'Aluminium Multi Link Rear Axle Suspension',
'Self Tracking Trapezoidal Link Rear Suspension',
'Lightweight Spring Strut Rear Suspension', nan], dtype=object)
In [170]:
#Renault
In [171]:
main.loc[main["Model"]=="Renault"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[171]:
array(['Twist beam suspension with coil spring', 'Torsion beam axle'],
dtype=object)
In [172]:
com=main.loc[main["Model"]=="Renault"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [173]:
com.unique()
Out[173]:
array(['Twist Beam Rear Suspension With Coil Spring',
'Torsion Beam Axle Rear Suspension'], dtype=object)
In [174]:
#Rolls-Royce
In [175]:
main.loc[main["Model"]=="Rolls-Royce"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[175]:
array(['Multi Link', nan, 'multi-link rear axle'], dtype=object)
In [176]:
com=main.loc[main["Model"]=="Rolls-Royce"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [177]:
com.unique()
Out[177]:
array(['Multi Link Rear Suspension', nan,
'Multi Link Rear Axle Suspension'], dtype=object)
In [178]:
#Skoda
In [179]:
main.loc[main["Model"]=="Skoda"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[179]:
array(['Twist Beam Axle',
'Multi-element axle, with longitudinal and transverse links, with torsion stabiliser',
'Multilink suspension, one longitudinal and three transverse arms',
'Multi-element axle, with one longitudinal and transverse links, with torsion stabiliser'],
dtype=object)
In [180]:
com=main.loc[main["Model"]=="Skoda"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [181]:
com.unique()
Out[181]:
array(['Twist Beam Axle Rear Suspension',
'Multi Element Axle With Longitudinal And Transverse Links With Torsion Stabiliser Rear Suspension',
'Multilink Rear Suspension With Longitudinal And Three Transverse Arms'],
dtype=object)
In [182]:
#Tata
In [183]:
main.loc[main["Model"]=="Tata"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[183]:
array(['Semi Independent Twist Blade with Panhard Rod & Coil Spring',
'Semi-Independent closed profile Twist beam with Coil Spring and shock absorber',
'Twist Beam with Coil Spring',
'Semi-independent Twist Beam With Coil Spring And Shock Absorber',
'Twist beam with coil spring and shock absorber',
'Semi-independent; Rear Twist Beam with Dual path Strut',
'Semi Independent Twist Blade with Panhard Rod and Coil Spring',
'Twist beam with dual path Strut',
'Twist beam with dual path strut',
'Twist Beam with Coil Spring and Shock Absorber',
'Innovative Two-stage semi-elliptical leaf springs',
'Semi-independent Closed Profile Twist Beam with Dual Path Strut',
'Semi-Independent Closed Profile Twist Beam with Dual Path Strut',
'Innovative Two-stage Semi-elliptical leaf springs-7leaves',
'Rear Twist Beam with Coil Spring',
'Twist Beam with Coil Spring and Shock Absorberf', nan],
dtype=object)
In [184]:
com=main.loc[main["Model"]=="Tata"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [185]:
com.unique()
Out[185]:
array(['Semi Independent Twist Blade With Panhard Rod With Coil Spring Rear Suspension',
'Semi Independent Closed Profile Twist Beam With Coil Spring And Shock Absorber Rear Suspension',
'Twist Beam With Coil Spring Rear Suspension',
'Semi Independent Twist Beam With Coil Spring And Shock Absorber Rear Suspension',
'Twist Beam With Coil Spring And Shock Absorber Rear Suspension',
'Semi Independent With Rear Twist Beam With Dual Path Strut Suspension',
'Semi Independent Twist Blade With Panhard Rod And Coil Spring Rear Suspension',
'Twist Beam With Dual Path Strut Rear Suspension',
'Innovative Two Stage Semi Elliptical Leaf Spring Rear Suspension',
'Semi Independent Closed Profile Twist Beam With Dual Path Strut Rear Suspension',
'Innovative Two Stage Semi Elliptical Leaf Spring 7Leaves Rear Suspension',
'Rear Twist Beam With Coil Spring Suspension', nan], dtype=object)
In [186]:
#Toyota
In [187]:
main.loc[main["Model"]=="Toyota"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[187]:
array(['4-Link With Coil Spring', 'leaf spring',
'4-Link with Coil Spring', 'Torsion Beam', nan, 'Double Wishbone'],
dtype=object)
In [188]:
com=main.loc[main["Model"]=="Toyota"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [189]:
com.unique()
Out[189]:
array(['4 Link Coil Spring Rear Suspension',
'Leaf Spring Rear Suspension', 'Torsion Beam Rear Suspension', nan,
'Double Wishbone Rear Suspension'], dtype=object)
In [190]:
#Volkswagen
In [191]:
main.loc[main["Model"]=="Volkswagen"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[191]:
array(['Twist beam axle', 'Semi Indpendent Trailing Arm',
'Twist beam axle`', 'Independent suspension by four-link axle'],
dtype=object)
In [192]:
com=main.loc[main["Model"]=="Volkswagen"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [193]:
com.unique()
Out[193]:
array(['Twist Beam Axle Rear Suspension',
'Semi Indpendent Trailing Arm Rear Suspension',
'Independent Rear Suspension By Four Link Axle'], dtype=object)
In [194]:
#Volvo
In [195]:
main.loc[main["Model"]=="Volvo"][["Rear Suspension"]]["Rear Suspension"].unique()
Out[195]:
array(['Air', nan, 'Multi Link', 'sophisticated Suspension'], dtype=object)
In [196]:
com=main.loc[main["Model"]=="Volvo"]["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [197]:
com.unique()
Out[197]:
array(['Air Rear Suspension', nan, 'Multi Link Rear Suspension',
'Sophisticated Rear Suspension'], dtype=object)
In [198]:
#Apply this function to all brands
main["Rear Suspension"]=main["Rear Suspension"].apply(preprocess_Rear_Suspension)
In [199]:
#preprocessed rear suspension
len(main["Rear Suspension"].unique())
Out[199]:
128
In [200]:
#Unpreprocessed rear suspension
len(main_data["Rear Suspension"].unique())
Out[200]:
180
In [201]:
main["Rear Suspension"].unique()
Out[201]:
array(['Torsion Beam Rear Suspension', nan,
'Torsion Beam With Coil Spring Rear Suspension',
'3 Link Rigid Axle Rear Suspension',
'Leaf Spring Rigid Axle Rear Suspension',
'Semi Independent Twist Beam With Twin Gas And Oil Filled Shock Absorbers Rear Suspension',
'Semi Independent Twist Beam With Coil Spring Rear Suspension',
'Semi Independent Twist Beam Rear Suspension',
'Heavy Duty Twist Beam With Strut Type Coil Spring/Damper Units Rear Suspension',
'Coil Spring With Anti Roll Bar Rear Suspension',
'Twist Beam Rear Suspension',
'Semi Independent Twist Beam With Twin Shock Absorbers Filled With Gas With Oil Rear Suspension',
'Semi Independent Rear Suspension',
'Integral Link Independent With Coil Spring With Stabilizer Bar Rear Suspension',
'Semi Independent Heavy Duty Twist Beam With Coil Spring Rear Suspension',
'Progressive Linear Rate Leaf Spring With Low Friction Pads Rear Suspension',
'Coil Spring Watts Linkage Type With Anti Roll Bar Rear Suspension',
'Progessive Linear Rate Leaf Spring With Low Friction Pads Rear Suspension',
'Leaf Spring Rear Suspension', 'Air Rear Suspension',
'4 Link Rear Suspension',
'Five Link Rear Suspension With Tubular Anti Roll Bar',
'Rs Sports Rear Suspension',
'Five Link Axle With Tubular Anti Roll Bar With Air Spring Rear Suspension',
'Sport Adaptive Air Rear Suspension',
'Adaptive Air Rear Suspension', 'Rs Adaptive Air Rear Suspension',
'S Sports Rear Suspension',
'Trapezoidak Muliti Link Rear Suspension',
'Adaptive 2 Axle Air Rear Suspension',
'Adaptive M Specific Rear Suspension',
'Adaptive M Rear Suspension', 'Five Arm Rear Suspension',
'Dynamic Damper Control Rear Suspension',
'M Sport Rear Suspension', 'Independent Damping Rear Suspension',
'Airmatic Rear Suspension',
'Adaptive Rear Suspension With Variable Shock Absorber',
'Adaptive Rear Suspension',
'Independent Double Wishbones Rear Suspension',
'Multi Link Pan Hard Rod With Coil Spring Rear Suspension',
'Independent With Multi Link Adaptive Damping Rear Suspension',
'Magnetorheological Damper Rear Suspension',
'Adaptive Magnetic Rear Suspension',
'Torsion Beam Axle With Coil Spring Rear Suspension',
'Twisted Torsion Beam With Coil Spring Rear Suspension',
'Torsion Bar With Coil Spring Rear Suspension',
'Coupled Torsion Beam Axle Rear Suspension',
'Coupled Torsion Beam Axle With Coil Spring Rear Suspension',
'Multi Link Coil Spring Rear Suspension',
'Penta Link Coil Rear Suspension Gas Shock Absorbers Stabiliser Bar',
'Soft Ride With Leaf Spring Rear Suspension',
'Semi Elliptic Leaf Spring Rear Suspension',
'Multi Link Rear Suspension With Strut Assembly',
'Multi Link With Strut Rear Suspension With Fsd With Anti Roll Bar',
'Heavy Duty With Gas Shocks Rear Suspension',
'Multi Link Rear Suspension',
'Aluminum Double Wishbone Rear Suspension',
'Push Rod Magneto Rheologic Active With Horizontal Dampers Rear Suspension',
'Electronic Air Rear Suspension',
'Integral Coil Spring Rear Suspension',
'Double Wishbone Rear Suspension',
'Air Adaptive Variable Rear Suspension',
'4 Link Type With Coil Spring Rear Suspension',
'Twist Beam Rear Suspension With Coil Spring',
'Pentalink Rear Suspension With Watt’S Linkage With Fdd With Mtv Cl',
'5 Link Rear Suspension With Coil Spring Suspension',
'Rigid Leaf Spring Rear Suspension',
'Multi Link Independent Rear Suspension With Fsd Stabilizer Bar',
'Multilink Solid Rear Axle With Coil Over Damper With Stabiliser Bar Suspension',
'Rigid Axle With Leaf Spring Rear Suspension',
'H Section Torsion Beam With Coil Spring Rear Suspension',
'Multi Link Coil Spring Rear Suspension And Anti Roll Bar',
'Multi Link Coil Spring With Anti Roll Bar And Adaptive Damping Rear Suspension',
'Air Adaptive Rear Suspensions',
'Five Arm Multilink Rear Suspension',
'Quattroporte Sport Gt S Is Fitted With The Single Setting Racing Style Rear Suspension System',
'Amg Rear Suspension', 'Amg Ride Control Sports Rear Suspension',
'Adaptive Damping System Rear Suspension',
'Amg Ride Control+ Rear Suspension',
'Five Link Multi Link Independent Rear Suspension',
'Rear Suspension With Adaptive Damping System',
'Adaptive Damping Rear Suspension',
'Amg Ride Control Rear Suspension',
'Active Roll Stabilization Intelligent Rear Suspension',
'Agility Control Rear Suspension',
'Dynamic Body Control Rear Suspension',
'Coil Spring Rear Suspension',
'Semi Independent Helical Spring Torsion Beam Rear Suspension',
'Five Link Integral Rear Suspension',
'Multiple Control Arm Rear Axle Suspension',
'Sport Rear Suspension', 'Multiple Control Arm Rear Suspension',
'Independent Multi Link With Stabilizer Bar Rear Suspension',
'Multi Link Coil Spring With Stabilizer Bar Rear Suspension',
'3 Link Coil Spring Rigid Axle With Stabiliser Bar Rear Suspension',
'Multi Link With Bilstein Shock Absorbers With Eibach Spring With Stabilizer Bar Rear Suspension',
'3 Link Rear Suspension', '3 Link Coil Spring Rear Suspension',
'Twin Tube Telescopic Shock Absorber Rear Suspension',
'Aluminum Multi Link Axle With Subframe With Independent Wheel Rear Suspension',
'Active Rear Suspension', 'Spring Strut Rear Suspension',
'Aluminium Multi Link Rear Axle Suspension',
'Self Tracking Trapezoidal Link Rear Suspension',
'Lightweight Spring Strut Rear Suspension',
'Torsion Beam Axle Rear Suspension',
'Multi Link Rear Axle Suspension',
'Twist Beam Axle Rear Suspension',
'Multi Element Axle With Longitudinal And Transverse Links With Torsion Stabiliser Rear Suspension',
'Multilink Rear Suspension With Longitudinal And Three Transverse Arms',
'Semi Independent Twist Blade With Panhard Rod With Coil Spring Rear Suspension',
'Semi Independent Closed Profile Twist Beam With Coil Spring And Shock Absorber Rear Suspension',
'Twist Beam With Coil Spring Rear Suspension',
'Semi Independent Twist Beam With Coil Spring And Shock Absorber Rear Suspension',
'Twist Beam With Coil Spring And Shock Absorber Rear Suspension',
'Semi Independent With Rear Twist Beam With Dual Path Strut Suspension',
'Semi Independent Twist Blade With Panhard Rod And Coil Spring Rear Suspension',
'Twist Beam With Dual Path Strut Rear Suspension',
'Innovative Two Stage Semi Elliptical Leaf Spring Rear Suspension',
'Semi Independent Closed Profile Twist Beam With Dual Path Strut Rear Suspension',
'Innovative Two Stage Semi Elliptical Leaf Spring 7Leaves Rear Suspension',
'Rear Twist Beam With Coil Spring Suspension',
'4 Link Coil Spring Rear Suspension',
'Semi Indpendent Trailing Arm Rear Suspension',
'Independent Rear Suspension By Four Link Axle',
'Sophisticated Rear Suspension'], dtype=object)
In [202]:
main.columns[36]
Out[202]:
'Petrol Mileage (ARAI)'
In [203]:
main["Steering Type"]
Out[203]:
0 Electric
1 Manual
...
548 NaN
549 NaN
Name: Steering Type, Length: 160031, dtype: object
In [204]:
main["Steering Type"].unique()
Out[204]:
array(['Electric', 'Manual', nan, 'Power', 'MT', 'EPAS', 'Electrical',
'electric', 'power', 'Motor', 'Electro', 'Direct', 'Hydraulic',
'Electronic'], dtype=object)
In [205]:
def preprocess_Steering_Type(text):
text=str(text)
if text=="nan":
return np.nan
text=text.title()
if text=="Electrical":
text=text.replace("Electrical","Electric")
if text=="Electro":
text=text.replace("Electro","Electric")
if text=="Electronic":
text=text.replace("Electronic","Electric")
return text
In [206]:
main["Steering Type"]=main["Steering Type"].apply(preprocess_Steering_Type)
In [207]:
main["Steering Type"].unique()
Out[207]:
array(['Electric', 'Manual', nan, 'Power', 'Mt', 'Epas', 'Motor',
'Direct', 'Hydraulic'], dtype=object)
In [208]:
main.columns[37]
Out[208]:
'Petrol Fuel Tank Capacity (Litres)'
In [209]:
main["Steering Column"]
Out[209]:
0 Tilt
1 NaN
...
548 NaN
549 NaN
Name: Steering Column, Length: 160031, dtype: object
In [210]:
main["Steering Column"].unique()
Out[210]:
array(['Tilt', nan, 'Tilt & Telescopic', 'Collapsible',
'Tilt and Telescopic', 'Tilt & Collapsible', 'Tilt Adjustable',
'Tilt adjust', 'Tilt And Collapsible', 'Tilt Adjustuble',
'Tilt Steering', 'Tilt adjustuble', 'Adjustable', 'Height & Reach',
'Tilt & Reach Adjustment', 'Tilt&Telescope', 'Adjustable Steering',
'Tiltable & Telescopic', 'Telescopic & Tilt', 'Tilt & telescopic',
'Collapsible Steering', 'rick and pin', 'Power',
'Tilt and Collapsible', 'Height & Reach Adjustment',
'tilt and telescopic', 'Electrical Adjustable Steering',
'Electrically Adjustable', 'Tilt Adjustable Steering',
'Hydraulic Variable Power Assis', 'Tilt & Collapsible Steering',
'Collapsible Steering Column', 'Low Tilt Steering',
'Rack & Pinion', 'Electric Power Steering', 'Tilt & Adjustable'],
dtype=object)
In [211]:
def preprocess_Steering_Column(text):
text=str(text)
if text=="nan":
return np.nan
#One Exception case
if "Telescopic & Tilt" in text:
return "Tilt And Telescopic"
text=text.title()
if "Steering" in text:
text=text.replace("Steering","")
if "Column" in text:
text=text.replace("Column","")
if " " in text:
text=text.replace(" "," ")
if text[len(text)-1]==" ":
text=text[:len(text)-1]
if "Tilt" in text and (len(text)==4 or "Low" in text):
text=text.replace("Tilt","Tiltable")
else:
text=text.replace("Tiltable","Tilt")
if "&" in text:
index=text.find("&")
if text[index-1]!=" " and text[index+1]!=" ":
text=text.replace("&"," And ")
else:
text=text.replace("&","And")
if "Adjust" in text:
ind=text.find('Adjust')
try:
text[ind+6]
except:
text=text.replace("Adjust","Adjustable")
if "Adjustuble" in text:
text=text.replace("Adjustuble","Adjustable")
if "Tilt" in text and "Adjustable" in text and "And" not in text:
tilt_index=text.find("Tilt")
text=text[:tilt_index+4]+" And"+text[tilt_index+4:]
if "Telescope" in text:
text=text.replace("Telescope","Telescopic")
if "Electrically" in text:
text=text.replace("Electrically","Electrical")
return text
In [212]:
main["Steering Column"]=main["Steering Column"].apply(preprocess_Steering_Column)
In [213]:
main["Steering Column"].unique()
Out[213]:
array(['Tiltable', nan, 'Tilt And Telescopic', 'Collapsible',
'Tilt And Collapsible', 'Tilt And Adjustable', 'Adjustable',
'Height And Reach', 'Tilt And Reach Adjustment', 'Rick And Pin',
'Power', 'Height And Reach Adjustment', 'Electrical Adjustable',
'Hydraulic Variable Power Assis', 'Low Tiltable',
'Rack And Pinion', 'Electric Power'], dtype=object)
In [214]:
main.columns[38]
Out[214]:
'Emission Norm Compliance'
In [215]:
main["Front Brake Type"]
Out[215]:
0 Disc
1 Disc
...
548 NaN
549 NaN
Name: Front Brake Type, Length: 160031, dtype: object
In [216]:
main["Front Brake Type"].unique()
Out[216]:
array(['Disc', 'Ventilated Disc', nan, 'Ventilated Discs',
'Ventillated Discs', 'Ventillated Disc', 'ceramic Ventilated Disc',
'Ceramic Disc', 'Vented Discs', 'ventilated disc', 'VeDi',
'carbon ceramic brakes', 'Ventilated Disc with Twin Pot Caliper',
'disc', 'Carbon Ceramic Brake', 'Carbon ceramic',
'Twin piston sliding fist caliper', 'Ventilated discs',
'Ventilated two piece steel brake discs', 'Ventilated steel discs',
'Vantilated Disc', 'Discs', 'Aluminium monobloc Disc',
'ventilated Disc'], dtype=object)
In [217]:
def preprocess_Front_Break_Type(text):
text=str(text)
if text=="nan":
return np.nan
text=text.title()
if "Ventillated" in text:
text=text.replace("Ventillated","Ventilated")
if "Vantilated" in text:
text=text.replace("Vantilated","Ventilated")
if "Discs" in text:
text=text.replace("Discs","Disc")
if "Brakes" in text:
text=text.replace("Brakes","")
if "Brake" in text:
text=text.replace("Brake","")
if " " in text:
text=text.replace(" "," ")
if text[len(text)-1]==" ":
text=text[:len(text)-1]
return text
In [218]:
main["Front Brake Type"]=main["Front Brake Type"].apply(preprocess_Front_Break_Type)
In [219]:
main["Front Brake Type"].unique()
Out[219]:
array(['Disc', 'Ventilated Disc', nan, 'Ceramic Ventilated Disc',
'Ceramic Disc', 'Vented Disc', 'Vedi', 'Carbon Ceramic',
'Ventilated Disc With Twin Pot Caliper',
'Twin Piston Sliding Fist Caliper',
'Ventilated Two Piece Steel Disc', 'Ventilated Steel Disc',
'Aluminium Monobloc Disc'], dtype=object)
In [220]:
main.columns[39]
Out[220]:
'Front Suspension'
In [221]:
main["Rear Brake Type"]
Out[221]:
0 Drum
1 Drum
...
548 NaN
549 NaN
Name: Rear Brake Type, Length: 160031, dtype: object
In [222]:
main["Rear Brake Type"].unique()
Out[222]:
array(['Drum', 'Solid Disc', nan, 'Disc', 'Self adjusting Drums',
'Self Adjusting Drum', 'Self Adjusting Drums',
'Self-Adjusting Drum', 'Ventilated Disc',
'ceramic Ventilated Disc', 'Ceramic Disc', 'Vented Discs',
'ventilated disc', 'VeDi-S-ABS', 'carbon ceramic brakes', 'disc',
'carbon ceramic brakes.', 'Carbon Ceramic Brake', 'Carbon ceramic',
'Single piston sliding fist', 'Ventilated discs',
'Ventilated Discs', 'drum',
'Ventilated two piece steel brake discs', 'Ventilated steel discs',
'Drum in Discs', 'Disc & Drum', 'Drum in disc', 'Ventilated Drum',
'Ventilated Drum In Discs', 'Discs', 'Aluminium monobloc Disc',
'Drum`'], dtype=object)
In [223]:
def preprocess_Rear_Brake_Type(text):
list_elements_to_null_out=[".","Brakes","Brake","`"]
text=str(text)
if text=="nan":
return np.nan
#one exception case
if "Disc & Drum" in text:
return "Drum & Disc"
text=text.title()
if "In" in text:
text=text.replace("In","&")
if "Drums" in text:
text=text.replace("Drums","Drum")
if "Discs" in text:
text=text.replace("Discs","Disc")
if "-" in text:
text=text.replace("-"," ")
for i in list_elements_to_null_out:
text=text.replace("{}".format(i),"")
if " " in text:
text=text.replace(" "," ")
if text[len(text)-1]==" ":
text=text[:len(text)-1]
return text
In [224]:
main["Rear Brake Type"]=main["Rear Brake Type"].apply(preprocess_Rear_Brake_Type)
In [225]:
main["Rear Brake Type"].unique()
Out[225]:
array(['Drum', 'Solid Disc', nan, 'Disc', 'Self Adjusting Drum',
'Ventilated Disc', 'Ceramic Ventilated Disc', 'Ceramic Disc',
'Vented Disc', 'Vedi S Abs', 'Carbon Ceramic',
'Single Piston Sliding Fist', 'Ventilated Two Piece Steel Disc',
'Ventilated Steel Disc', 'Drum & Disc', 'Ventilated Drum',
'Ventilated Drum & Disc', 'Aluminium Monobloc Disc'], dtype=object)
In [226]:
main.columns[40]
Out[226]:
'Rear Suspension'
In [227]:
main["Length (mm)"]
Out[227]:
0 3995.0
1 3675.0
...
548 NaN
549 NaN
Name: Length (mm), Length: 160031, dtype: float64
In [228]:
main.columns[41]
Out[228]:
'Steering Type'
In [229]:
main['Width (mm)']
Out[229]:
0 1735.0
1 1475.0
...
548 NaN
549 NaN
Name: Width (mm), Length: 160031, dtype: float64
In [230]:
main.columns[42]
Out[230]:
'Steering Column'
In [231]:
main["Height (mm)"]
Out[231]:
0 1515.0
1 1825.0
...
548 NaN
549 NaN
Name: Height (mm), Length: 160031, dtype: object
In [232]:
main["Height (mm)"].unique()
Out[232]:
array([1515.0, 1825.0, 1595.0, 1553.0, nan, 1485.0, 1530.0, 1475.0,
1755.0, 1500.0, 1555.0, 1690.0, 1520.0, 1567.0, 1685.0, 1675.0,
1837.0, 1427.0, 1647.0, 1525.0, 1703.0, 1418.0, 1433.0, 1686.0,
1488.0, 1673.0, 1652.0, 1745.0, 1304.0, 1598.0, 1420.0, 1695.0,
1469.0, 1448.0, 1497.0, 1667.0, 1676.0, 1212.0, 2075, 1318.0,
1187.0, 1206.0, 1276.0, 1186.0, '1544', '1489', '1601',
'1498-1501', '1495', 1635.0, 1560.0, 1617.0, 1665.0, 1505.0, 1860,
1840, 1790, 1640.0, 1698.0, 1838.0, 1848.0, 1708.0, 1645.0, 1642.0,
1550.0, 1180.0, 1165.0, 1888.0, 1724.0, 1857.0, 1627.0, 1845.0,
1655.0, 1844.0, 1855.0, 1995.0, 1774.0, 1300.0, 1495.0, 1624.0,
1400.0, 1446.0, 1510.0, 1644.0, 1720.0, 1411.0, 1447.0, 1588.0,
1437.0, 1503.0, 1716.0, 1823.0, 1909.0, 1402.0, 1585.0, 1611.0,
1880.0, 1901.0, 1518.0, 1760.0, 1867.0, 1650.0, 1649.0, 1710.0,
1279.0, 1395.0, 1636.0, 1299.0, 1605, 1643, 1490, 1552.0, 1835.0,
1612.0, 1706.0, 1606.0, 1535.0, 1615.0, 1537.0, 1786.0, 1616.0,
1532.0, 1523.0, 1810.0, 1815.0, 1795.0, 1443.0, 1431.0],
dtype=object)
In [233]:
def preprocess_Height(text):
if type(text)==str:
lists=text.split("-")
if len(lists)==1:
return float(text)
else:
first_=float(lists[0])
second_=float(lists[1])
avg_height=(first_+second_)/2
return avg_height
return text
In [234]:
main["Height (mm)"]=main["Height (mm)"].apply(preprocess_Height)
In [235]:
main["Height (mm)"]
Out[235]:
0 1515.0
1 1825.0
...
548 NaN
549 NaN
Name: Height (mm), Length: 160031, dtype: float64
In [236]:
main["Boot Space (Litres)"]
Out[236]:
0 378.0
1 NaN
...
548 NaN
549 NaN
Name: Boot Space (Litres), Length: 160031, dtype: object
In [237]:
main["Boot Space (Litres)"].unique()
Out[237]:
array([378.0, nan, 375.0, 313.0, 510.0, 268.0, 260.0, 209.0, 318.0, 214.0,
328.0, 341.0, '352', '284 ers', '3371,175', '400.0', '430', '359',
'284', '430.0', '257.0', '400', '257', '359re', '430re', '346',
'359 re', '2,055', 530.0, 335.0, 560.0, 465.0, 505.0, 460.0, 605.0,
535.0, '358ers', '484', '358', '420', '390 re', '281.0', '480.0',
'450', '326.0', '440', '470/1290', 37.0, 500, 200.0, 272.0, 210.0,
74.0, 354.0, 506.0, 363.0, 420.0, 235.0, 311.0, 433.0, 392.0,
540.0, 110.0, 616.0, 740.0, '520l', '454', '259', '259 l', '243.0',
'384', '510.0', '190.0', '384.0', '190', '259.0', '580', '530ers',
'173', 825.0, 520.0, 550.0, 285.0, 480.0, '587.0', '155 l', '211',
'160', '211rs', '1,050', '420.0', '500.0', 336, 400, 315, '132',
'446lts', '132 l', '598', '405lts', '275', '405', '446 lts', '645',
'458', '150', '125', '625', '770', '772', '745', 405, 84, 279, 521,
270, 385, 600, 625, 425.0, 350.0, 366.0, 345.0, 242.0, 73.0, 316.0,
205.0, 419.0, 300.0, '385', '521', '494', '615s', '300', '495lts',
'414'], dtype=object)
In [238]:
def preprocess_Boot_Space(text):
text=str(text)
if text=="nan":
return np.nan
lists=text.split("/")
if len(lists)==1:
text=text.replace(",","")
text=text.replace("".join(re.findall('[a-zA-Z]',text)),"")
if text[len(text)-1]==" ":
text=text[:len(text)-1]
text=float(text)
else:
f=float(lists[0])
s=float(lists[1])
text=(f+s)/2
return text
In [239]:
preprocess_Boot_Space('480.0')
Out[239]:
480.0
In [240]:
main["Boot Space (Litres)"]=main["Boot Space (Litres)"].apply(preprocess_Boot_Space)
In [739]:
main["Boot Space (Litres)"]
Out[739]:
0 378.0
1 NaN
2 375.0
3 378.0
4 NaN
...
160026 414.0
160027 NaN
160028 NaN
160029 NaN
160030 NaN
Name: Boot Space (Litres), Length: 160031, dtype: float64
In [740]:
main.columns[43]
Out[740]:
'Power Windows-Front'
In [741]:
main["Power Windows-Front"]
Out[741]:
0 YES
1 NaN
2 YES
3 YES
4 NO
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Power Windows-Front, Length: 160031, dtype: object
In [742]:
main["Power Windows-Front"].unique()
Out[742]:
array(['YES', nan, 'NO', '5', '4', '2', '1890', '2923', '506mm', '3200',
'348', '2765'], dtype=object)
In [743]:
plt.figure(figsize=(15,5))
sns.countplot(main["Power Windows-Front"])
Out[743]:
<AxesSubplot:xlabel='Power Windows-Front', ylabel='count'>
In [744]:
main["Power Windows-Front"]=main["Power Windows-Front"].apply(lambda x:preprocess_to_null_out(x,False))
In [745]:
plt.figure(figsize=(15,5))
sns.countplot(main["Power Windows-Front"])
Out[745]:
<AxesSubplot:xlabel='Power Windows-Front', ylabel='count'>
In [746]:
main.columns[44]
Out[746]:
'Power Windows-Rear'
In [747]:
main["Power Windows-Rear"]
Out[747]:
0 YES
1 NaN
2 YES
3 YES
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Power Windows-Rear, Length: 160031, dtype: object
In [748]:
main["Power Windows-Rear"].unique()
Out[748]:
array(['YES', nan, 'NO', '5', '4', '1890', '2923', '2', '506mm', '348',
'2765'], dtype=object)
In [749]:
plt.figure(figsize=(15,5))
sns.countplot(main["Power Windows-Rear"])
Out[749]:
<AxesSubplot:xlabel='Power Windows-Rear', ylabel='count'>
In [750]:
main["Power Windows-Rear"]=main["Power Windows-Rear"].apply(lambda x:preprocess_to_null_out(x,False))
In [751]:
plt.figure(figsize=(15,5))
sns.countplot(main["Power Windows-Rear"])
Out[751]:
<AxesSubplot:xlabel='Power Windows-Rear', ylabel='count'>
In [752]:
main.columns[45]
Out[752]:
'Heater'
In [753]:
main["Heater"]
Out[753]:
0 YES
1 YES
2 YES
3 YES
4 YES
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Heater, Length: 160031, dtype: object
In [754]:
main["Heater"].unique()
Out[754]:
array(['YES', nan, 'NO', '5', '4', '2', '2923', '506mm', '348'],
dtype=object)
In [755]:
main["Heater"]=main["Heater"].apply(lambda x:preprocess_to_null_out(x,False))
In [756]:
plt.figure(figsize=(15,5))
sns.countplot(main["Heater"])
Out[756]:
<AxesSubplot:xlabel='Heater', ylabel='count'>
In [757]:
main.columns[46]
Out[757]:
'Adjustable Steering'
In [758]:
main["Adjustable Steering"]
Out[758]:
0 YES
1 NaN
2 YES
3 YES
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Adjustable Steering, Length: 160031, dtype: object
In [759]:
main["Adjustable Steering"].unique()
Out[759]:
array(['YES', nan, 'NO', '5', '4', '2', '2923', '348'], dtype=object)
In [760]:
main["Adjustable Steering"]=main["Adjustable Steering"].apply(lambda x:preprocess_to_null_out(x,False))
In [761]:
plt.figure(figsize=(15,5))
sns.countplot(main["Adjustable Steering"])
Out[761]:
<AxesSubplot:xlabel='Adjustable Steering', ylabel='count'>
In [762]:
main.columns[52]
Out[762]:
'Rear Seat Centre Arm Rest'
In [763]:
main["Rear Seat Centre Arm Rest"].unique()
Out[763]:
array(['YES', nan, 'NO', '5', '4', '2 Zone'], dtype=object)
In [764]:
plt.figure(figsize=(15,5))
sns.countplot(main["Rear Seat Centre Arm Rest"])
Out[764]:
<AxesSubplot:xlabel='Rear Seat Centre Arm Rest', ylabel='count'>
In [765]:
main["Rear Seat Centre Arm Rest"]=main["Rear Seat Centre Arm Rest"].apply(lambda x:preprocess_to_null_out(x,False))
In [766]:
plt.figure(figsize=(15,5))
sns.countplot(main["Rear Seat Centre Arm Rest"])
Out[766]:
<AxesSubplot:xlabel='Rear Seat Centre Arm Rest', ylabel='count'>
In [767]:
main.columns[53]
Out[767]:
'Height Adjustable Front Seat Belts'
In [768]:
main["Height Adjustable Front Seat Belts"].unique()
Out[768]:
array([nan, 'YES', 'NO', '1600.0', '1600', '5', '4'], dtype=object)
In [769]:
plt.figure(figsize=(15,5))
sns.countplot(main["Height Adjustable Front Seat Belts"])
Out[769]:
<AxesSubplot:xlabel='Height Adjustable Front Seat Belts', ylabel='count'>
In [770]:
main["Height Adjustable Front Seat Belts"]=main["Height Adjustable Front Seat Belts"].apply(lambda x:preprocess_to_null_out(x,False))
In [771]:
plt.figure(figsize=(15,5))
sns.countplot(main["Height Adjustable Front Seat Belts"])
Out[771]:
<AxesSubplot:xlabel='Height Adjustable Front Seat Belts', ylabel='count'>
In [772]:
main.columns[55]
Out[772]:
'Rear AC Vents'
In [773]:
main["Rear AC Vents"]
Out[773]:
0 YES
1 NaN
2 NaN
3 YES
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Rear AC Vents, Length: 160031, dtype: object
In [774]:
main["Rear AC Vents"].unique()
Out[774]:
array(['YES', nan, 'NO'], dtype=object)
In [775]:
plt.figure(figsize=(15,5))
sns.countplot(main["Rear AC Vents"])
Out[775]:
<AxesSubplot:xlabel='Rear AC Vents', ylabel='count'>
In [776]:
main.columns[56]
Out[776]:
'Seat Lumbar Support'
In [777]:
main["Seat Lumbar Support"]
Out[777]:
0 NaN
1 NaN
2 YES
3 NaN
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Seat Lumbar Support, Length: 160031, dtype: object
In [778]:
main["Seat Lumbar Support"].unique()
Out[778]:
array([nan, 'YES', 'NO'], dtype=object)
In [779]:
plt.figure(figsize=(15,5))
sns.countplot(main["Seat Lumbar Support"])
Out[779]:
<AxesSubplot:xlabel='Seat Lumbar Support', ylabel='count'>
In [780]:
main.columns[57]
Out[780]:
'Cruise Control'
In [781]:
main["Cruise Control"]
Out[781]:
0 YES
1 NaN
2 YES
3 YES
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Cruise Control, Length: 160031, dtype: object
In [782]:
main["Cruise Control"].unique()
Out[782]:
array(['YES', nan, 'NO'], dtype=object)
In [783]:
plt.figure(figsize=(15,5))
sns.countplot(main["Cruise Control"])
Out[783]:
<AxesSubplot:xlabel='Cruise Control', ylabel='count'>
In [784]:
main.columns[58]
Out[784]:
'Smart Access Card Entry'
In [785]:
main["Smart Access Card Entry"]
Out[785]:
0 YES
1 NaN
2 NaN
3 YES
4 NaN
...
160026 NaN
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Smart Access Card Entry, Length: 160031, dtype: object
In [786]:
main["Smart Access Card Entry"].unique()
Out[786]:
array(['YES', nan, 'NO'], dtype=object)
In [787]:
plt.figure(figsize=(15,5))
sns.countplot(main["Smart Access Card Entry"])
Out[787]:
<AxesSubplot:xlabel='Smart Access Card Entry', ylabel='count'>
In [788]:
main.columns[59]
Out[788]:
'KeyLess Entry'
In [789]:
main["KeyLess Entry"].unique()
Out[789]:
array(['YES', nan, 'NO'], dtype=object)
In [790]:
plt.figure(figsize=(15,5))
sns.countplot(main["KeyLess Entry"])
Out[790]:
<AxesSubplot:xlabel='KeyLess Entry', ylabel='count'>
In [791]:
main.columns[60]
Out[791]:
'Engine Start/Stop Button'
In [792]:
main["Engine Start/Stop Button"]
Out[792]:
0 YES
1 NaN
2 YES
3 YES
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Engine Start/Stop Button, Length: 160031, dtype: object
In [793]:
main["Engine Start/Stop Button"].unique()
Out[793]:
array(['YES', nan, 'NO'], dtype=object)
In [794]:
plt.figure(figsize=(15,5))
sns.countplot(main["Engine Start/Stop Button"])
Out[794]:
<AxesSubplot:xlabel='Engine Start/Stop Button', ylabel='count'>
In [795]:
main.columns[61]
Out[795]:
'Glove Box Cooling'
In [796]:
main["Glove Box Cooling"]
Out[796]:
0 NaN
1 NaN
2 NaN
3 NaN
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Glove Box Cooling, Length: 160031, dtype: object
In [797]:
main["Glove Box Cooling"].unique()
Out[797]:
array([nan, 'YES', 'NO'], dtype=object)
In [798]:
plt.figure(figsize=(15,5))
sns.countplot(main["Glove Box Cooling"])
Out[798]:
<AxesSubplot:xlabel='Glove Box Cooling', ylabel='count'>
In [799]:
main.columns[62]
Out[799]:
'Voice Control'
In [800]:
main["Voice Control"]
Out[800]:
0 YES
1 NaN
2 YES
3 YES
4 NO
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Voice Control, Length: 160031, dtype: object
In [801]:
main["Voice Control"].unique()
Out[801]:
array(['YES', nan, 'NO'], dtype=object)
In [802]:
plt.figure(figsize=(15,5))
sns.countplot(main["Voice Control"])
Out[802]:
<AxesSubplot:xlabel='Voice Control', ylabel='count'>
In [803]:
main.columns[63]
Out[803]:
'Gear Shift Indicator'
In [804]:
main["Gear Shift Indicator"]
Out[804]:
0 YES
1 NaN
2 NaN
3 NO
4 NO
...
160026 NaN
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Gear Shift Indicator, Length: 160031, dtype: object
In [805]:
main["Gear Shift Indicator"].unique()
Out[805]:
array(['YES', nan, 'NO', '7'], dtype=object)
In [806]:
plt.figure(figsize=(15,5))
sns.countplot(main["Gear Shift Indicator"])
Out[806]:
<AxesSubplot:xlabel='Gear Shift Indicator', ylabel='count'>
In [807]:
main["Gear Shift Indicator"]=main["Gear Shift Indicator"].apply(lambda x:preprocess_to_null_out(x,False))
In [808]:
plt.figure(figsize=(15,5))
sns.countplot(main["Gear Shift Indicator"])
Out[808]:
<AxesSubplot:xlabel='Gear Shift Indicator', ylabel='count'>
In [809]:
main.columns[64]
Out[809]:
'Tachometer'
In [810]:
main["Tachometer"]
Out[810]:
0 YES
1 NaN
2 NaN
3 YES
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Tachometer, Length: 160031, dtype: object
In [811]:
main["Tachometer"].unique()
Out[811]:
array(['YES', nan, 'NO', '2'], dtype=object)
In [812]:
plt.figure(figsize=(15,5))
sns.countplot(main["Tachometer"])
Out[812]:
<AxesSubplot:xlabel='Tachometer', ylabel='count'>
In [813]:
main["Tachometer"]=main["Tachometer"].apply(lambda x:preprocess_to_null_out(x,False))
In [814]:
plt.figure(figsize=(15,5))
sns.countplot(main["Tachometer"])
Out[814]:
<AxesSubplot:xlabel='Tachometer', ylabel='count'>
In [815]:
main.columns[65]
Out[815]:
'Electronic Multi-Tripmeter'
In [816]:
main["Electronic Multi-Tripmeter"]
Out[816]:
0 YES
1 YES
2 YES
3 YES
4 YES
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Electronic Multi-Tripmeter, Length: 160031, dtype: object
In [817]:
main["Electronic Multi-Tripmeter"].unique()
Out[817]:
array(['YES', nan, 'NO'], dtype=object)
In [818]:
plt.figure(figsize=(15,5))
sns.countplot(main["Electronic Multi-Tripmeter"])
Out[818]:
<AxesSubplot:xlabel='Electronic Multi-Tripmeter', ylabel='count'>
In [819]:
main.columns[[66]]
Out[819]:
Index(['Fabric Upholstery'], dtype='object')
In [820]:
main["Fabric Upholstery"]
Out[820]:
0 YES
1 YES
2 YES
3 YES
4 YES
...
160026 NaN
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Fabric Upholstery, Length: 160031, dtype: object
In [821]:
main["Fabric Upholstery"].unique()
Out[821]:
array(['YES', nan, 'NO'], dtype=object)
In [822]:
plt.figure(figsize=(15,5))
sns.countplot(main["Fabric Upholstery"])
Out[822]:
<AxesSubplot:xlabel='Fabric Upholstery', ylabel='count'>
In [823]:
main.columns[67]
Out[823]:
'Leather Steering Wheel'
In [824]:
main["Leather Steering Wheel"]
Out[824]:
0 YES
1 NaN
2 NO
3 YES
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Leather Steering Wheel, Length: 160031, dtype: object
In [825]:
main["Leather Steering Wheel"].unique()
Out[825]:
array(['YES', nan, 'NO', '2'], dtype=object)
In [826]:
plt.figure(figsize=(15,5))
sns.countplot(main["Leather Steering Wheel"])
Out[826]:
<AxesSubplot:xlabel='Leather Steering Wheel', ylabel='count'>
In [827]:
main["Leather Steering Wheel"]=main["Leather Steering Wheel"].apply(lambda x:preprocess_to_null_out(x,False))
In [828]:
plt.figure(figsize=(15,5))
sns.countplot(main["Leather Steering Wheel"])
Out[828]:
<AxesSubplot:xlabel='Leather Steering Wheel', ylabel='count'>
In [829]:
main.columns[68]
Out[829]:
'Glove Compartment'
In [830]:
main["Glove Compartment"]
Out[830]:
0 YES
1 YES
2 YES
3 YES
4 YES
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Glove Compartment, Length: 160031, dtype: object
In [831]:
main["Glove Compartment"].unique()
Out[831]:
array(['YES', nan, 'NO', '2'], dtype=object)
In [832]:
plt.figure(figsize=(15,5))
sns.countplot(main["Glove Compartment"])
Out[832]:
<AxesSubplot:xlabel='Glove Compartment', ylabel='count'>
In [833]:
main["Glove Compartment"]=main["Glove Compartment"].apply(lambda x:preprocess_to_null_out(x,False))
In [834]:
plt.figure(figsize=(15,5))
sns.countplot(main["Glove Compartment"])
Out[834]:
<AxesSubplot:xlabel='Glove Compartment', ylabel='count'>
In [835]:
main.columns[69]
Out[835]:
'Digital Clock'
In [836]:
main["Digital Clock"]
Out[836]:
0 YES
1 NaN
2 NaN
3 YES
4 YES
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Digital Clock, Length: 160031, dtype: object
In [837]:
main["Digital Clock"].unique()
Out[837]:
array(['YES', nan, 'NO'], dtype=object)
In [838]:
plt.figure(figsize=(15,5))
sns.countplot(main["Digital Clock"])
Out[838]:
<AxesSubplot:xlabel='Digital Clock', ylabel='count'>
In [839]:
main.columns[70]
Out[839]:
'Digital Odometer'
In [840]:
main["Digital Odometer"]
Out[840]:
0 NaN
1 YES
2 NaN
3 NaN
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Digital Odometer, Length: 160031, dtype: object
In [841]:
main["Digital Odometer"].unique()
Out[841]:
array([nan, 'YES', 'NO'], dtype=object)
In [842]:
plt.figure(figsize=(15,5))
sns.countplot(main["Digital Odometer"])
Out[842]:
<AxesSubplot:xlabel='Digital Odometer', ylabel='count'>
In [843]:
main.columns[71]
Out[843]:
'Height Adjustable Driver Seat'
In [844]:
main["Height Adjustable Driver Seat"]
Out[844]:
0 YES
1 NaN
2 YES
3 YES
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Height Adjustable Driver Seat, Length: 160031, dtype: object
In [845]:
main["Height Adjustable Driver Seat"].unique()
Out[845]:
array(['YES', nan, 'NO'], dtype=object)
In [846]:
plt.figure(figsize=(15,5))
sns.countplot(main["Height Adjustable Driver Seat"])
Out[846]:
<AxesSubplot:xlabel='Height Adjustable Driver Seat', ylabel='count'>
In [847]:
main.columns[72]
Out[847]:
'Dual Tone Dashboard'
In [848]:
main["Dual Tone Dashboard"]
Out[848]:
0 YES
1 NaN
2 YES
3 YES
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Dual Tone Dashboard, Length: 160031, dtype: object
In [849]:
main["Dual Tone Dashboard"].unique()
Out[849]:
array(['YES', nan, 'NO'], dtype=object)
In [850]:
plt.figure(figsize=(15,5))
sns.countplot(main["Dual Tone Dashboard"])
Out[850]:
<AxesSubplot:xlabel='Dual Tone Dashboard', ylabel='count'>
In [851]:
main.columns[73]
Out[851]:
'Adjustable Headlights'
In [852]:
main["Adjustable Headlights"]
Out[852]:
0 YES
1 YES
2 YES
3 YES
4 YES
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Adjustable Headlights, Length: 160031, dtype: object
In [853]:
main["Adjustable Headlights"].unique()
Out[853]:
array(['YES', nan, 'NO'], dtype=object)
In [854]:
plt.figure(figsize=(15,5))
sns.countplot(main["Adjustable Headlights"])
Out[854]:
<AxesSubplot:xlabel='Adjustable Headlights', ylabel='count'>
In [855]:
main.columns[74]
Out[855]:
'Electric Folding Rear View Mirror'
In [856]:
main["Electric Folding Rear View Mirror"]
Out[856]:
0 YES
1 NaN
2 YES
3 YES
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Electric Folding Rear View Mirror, Length: 160031, dtype: object
In [857]:
main["Electric Folding Rear View Mirror"].unique()
Out[857]:
array(['YES', nan, 'NO'], dtype=object)
In [858]:
plt.figure(figsize=(15,5))
sns.countplot(main["Electric Folding Rear View Mirror"])
Out[858]:
<AxesSubplot:xlabel='Electric Folding Rear View Mirror', ylabel='count'>
In [859]:
main.columns[75]
Out[859]:
'Rear Window Wiper'
In [860]:
main["Rear Window Wiper"]
Out[860]:
0 NaN
1 NaN
2 YES
3 NaN
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Rear Window Wiper, Length: 160031, dtype: object
In [861]:
main["Rear Window Wiper"].unique()
Out[861]:
array([nan, 'YES', 'NO'], dtype=object)
In [862]:
plt.figure(figsize=(15,5))
sns.countplot(main["Rear Window Wiper"])
Out[862]:
<AxesSubplot:xlabel='Rear Window Wiper', ylabel='count'>
In [863]:
main.columns[76]
Out[863]:
'Rear Window Washer'
In [864]:
main["Rear Window Washer"]
Out[864]:
0 NaN
1 NaN
2 YES
3 NaN
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Rear Window Washer, Length: 160031, dtype: object
In [865]:
main["Rear Window Washer"].unique()
Out[865]:
array([nan, 'YES', 'NO'], dtype=object)
In [866]:
plt.figure(figsize=(15,5))
sns.countplot(main["Rear Window Washer"])
Out[866]:
<AxesSubplot:xlabel='Rear Window Washer', ylabel='count'>
In [867]:
main.columns[77]
Out[867]:
'Rear Window Defogger'
In [868]:
main["Rear Window Defogger"]
Out[868]:
0 YES
1 NaN
2 YES
3 YES
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Rear Window Defogger, Length: 160031, dtype: object
In [869]:
main["Rear Window Defogger"].unique()
Out[869]:
array(['YES', nan, 'NO'], dtype=object)
In [870]:
plt.figure(figsize=(15,5))
sns.countplot(main["Rear Window Defogger"])
Out[870]:
<AxesSubplot:xlabel='Rear Window Defogger', ylabel='count'>
In [871]:
main.columns[78]
Out[871]:
'Rear Spoiler'
In [872]:
main["Rear Spoiler"]
Out[872]:
0 NaN
1 NaN
2 NaN
3 NaN
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Rear Spoiler, Length: 160031, dtype: object
In [873]:
main["Rear Spoiler"].unique()
Out[873]:
array([nan, 'YES', 'NO'], dtype=object)
In [874]:
plt.figure(figsize=(15,5))
sns.countplot(main["Rear Spoiler"])
Out[874]:
<AxesSubplot:xlabel='Rear Spoiler', ylabel='count'>
In [875]:
main.columns[79]
Out[875]:
'Sun Roof'
In [876]:
main["Sun Roof"]
Out[876]:
0 NaN
1 NaN
2 NaN
3 NaN
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Sun Roof, Length: 160031, dtype: object
In [877]:
main["Sun Roof"].unique()
Out[877]:
array([nan, 'YES', 'NO'], dtype=object)
In [878]:
plt.figure(figsize=(15,5))
sns.countplot(main["Sun Roof"])
Out[878]:
<AxesSubplot:xlabel='Sun Roof', ylabel='count'>
In [879]:
main.columns[80]
Out[879]:
'Moon Roof'
In [880]:
main["Moon Roof"]
Out[880]:
0 NaN
1 NaN
2 NaN
3 NaN
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Moon Roof, Length: 160031, dtype: object
In [881]:
main["Moon Roof"].unique()
Out[881]:
array([nan, 'YES', 'NO'], dtype=object)
In [882]:
plt.figure(figsize=(15,5))
sns.countplot(main["Moon Roof"])
Out[882]:
<AxesSubplot:xlabel='Moon Roof', ylabel='count'>
In [883]:
main.columns[81]
Out[883]:
'Outside Rear View Mirror Turn Indicators'
In [884]:
main["Outside Rear View Mirror Turn Indicators"]
Out[884]:
0 YES
1 NaN
2 YES
3 YES
4 NaN
...
160026 YES
160027 NaN
160028 NaN
160029 NaN
160030 NaN
Name: Outside Rear View Mirror Turn Indicators, Length: 160031, dtype: object
In [885]:
main["Outside Rear View Mirror Turn Indicators"].unique()
Out[885]:
array(['YES', nan, 'NO'], dtype=object)
In [886]:
plt.figure(figsize=(15,5))
sns.countplot(main["Outside Rear View Mirror Turn Indicators"])
Out[886]:
<AxesSubplot:xlabel='Outside Rear View Mirror Turn Indicators', ylabel='count'>
In [887]:
main.columns[82]
Out[887]:
'Intergrated Antenna'
In [888]:
main["Intergrated Antenna"]
Out[888]:
0 NaN
1 NaN
2 NaN
3 NaN
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Intergrated Antenna, Length: 160031, dtype: object
In [889]:
main["Intergrated Antenna"].unique()
Out[889]:
array([nan, 'YES', 'NO'], dtype=object)
In [890]:
plt.figure(figsize=(15,5))
sns.countplot(main["Intergrated Antenna"])
Out[890]:
<AxesSubplot:xlabel='Intergrated Antenna', ylabel='count'>
In [891]:
main.columns[83]
Out[891]:
'Chrome Grille'
In [892]:
main["Chrome Grille"]
Out[892]:
0 NaN
1 NaN
2 YES
3 NaN
4 NaN
...
160026 NaN
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Chrome Grille, Length: 160031, dtype: object
In [893]:
main["Chrome Grille"].unique()
Out[893]:
array([nan, 'YES', 'NO'], dtype=object)
In [894]:
plt.figure(figsize=(15,5))
sns.countplot(main["Chrome Grille"])
Out[894]:
<AxesSubplot:xlabel='Chrome Grille', ylabel='count'>
In [895]:
main.columns[84]
Out[895]:
'Halogen Headlamps'
In [896]:
main["Halogen Headlamps"]
Out[896]:
0 NO
1 YES
2 YES
3 NO
4 YES
...
160026 NaN
160027 NaN
160028 NaN
160029 NaN
160030 NaN
Name: Halogen Headlamps, Length: 160031, dtype: object
In [897]:
main["Halogen Headlamps"].unique()
Out[897]:
array(['NO', 'YES', nan], dtype=object)
In [898]:
plt.figure(figsize=(15,5))
sns.countplot(main["Halogen Headlamps"])
Out[898]:
<AxesSubplot:xlabel='Halogen Headlamps', ylabel='count'>
In [899]:
main.columns[85]
Out[899]:
'Roof Rail'
In [900]:
main["Roof Rail"]
Out[900]:
0 NaN
1 NaN
2 YES
3 NaN
4 NaN
...
160026 NaN
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Roof Rail, Length: 160031, dtype: object
In [901]:
main["Roof Rail"].unique()
Out[901]:
array([nan, 'YES', 'NO'], dtype=object)
In [902]:
plt.figure(figsize=(15,5))
sns.countplot(main["Roof Rail"])
Out[902]:
<AxesSubplot:xlabel='Roof Rail', ylabel='count'>
In [903]:
main.columns[86]
Out[903]:
'LED DRLs'
In [904]:
main["LED DRLs"]
Out[904]:
0 YES
1 NaN
2 NO
3 YES
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: LED DRLs, Length: 160031, dtype: object
In [905]:
main["LED DRLs"].unique()
Out[905]:
array(['YES', nan, 'NO'], dtype=object)
In [906]:
plt.figure(figsize=(15,5))
sns.countplot(main["LED DRLs"])
Out[906]:
<AxesSubplot:xlabel='LED DRLs', ylabel='count'>
In [907]:
main.columns[87]
Out[907]:
'LED Taillights'
In [908]:
main["LED Taillights"]
Out[908]:
0 YES
1 NaN
2 NO
3 YES
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: LED Taillights, Length: 160031, dtype: object
In [909]:
main["LED Taillights"].unique()
Out[909]:
array(['YES', nan, 'NO'], dtype=object)
In [910]:
plt.figure(figsize=(15,5))
sns.countplot(main["LED Taillights"])
Out[910]:
<AxesSubplot:xlabel='LED Taillights', ylabel='count'>
In [911]:
main.columns[88]
Out[911]:
'Anti-Lock Braking System'
In [912]:
main["Anti-Lock Braking System"]
Out[912]:
0 YES
1 YES
2 YES
3 YES
4 YES
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Anti-Lock Braking System, Length: 160031, dtype: object
In [913]:
main["Anti-Lock Braking System"].unique()
Out[913]:
array(['YES', nan, 'NO', 'Speed Proportional Steering'], dtype=object)
In [914]:
plt.figure(figsize=(15,5))
sns.countplot(main["Anti-Lock Braking System"])
Out[914]:
<AxesSubplot:xlabel='Anti-Lock Braking System', ylabel='count'>
In [915]:
main["Anti-Lock Braking System"]=main["Anti-Lock Braking System"].apply(lambda x:preprocess_to_null_out(x,False))
In [916]:
plt.figure(figsize=(15,5))
sns.countplot(main["Anti-Lock Braking System"])
Out[916]:
<AxesSubplot:xlabel='Anti-Lock Braking System', ylabel='count'>
In [917]:
main.columns[89]
Out[917]:
'Central Locking'
In [918]:
main["Central Locking"]
Out[918]:
0 YES
1 NaN
2 YES
3 YES
4 NO
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Central Locking, Length: 160031, dtype: object
In [919]:
main["Central Locking"].unique()
Out[919]:
array(['YES', nan, 'NO'], dtype=object)
In [920]:
plt.figure(figsize=(15,5))
sns.countplot(main["Central Locking"])
Out[920]:
<AxesSubplot:xlabel='Central Locking', ylabel='count'>
In [921]:
main.columns[90]
Out[921]:
'Power Door Locks'
In [922]:
main["Power Door Locks"]
Out[922]:
0 YES
1 NaN
2 YES
3 YES
4 NO
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Power Door Locks, Length: 160031, dtype: object
In [923]:
main["Power Door Locks"].unique()
Out[923]:
array(['YES', nan, 'NO'], dtype=object)
In [924]:
plt.figure(figsize=(15,5))
sns.countplot(main["Power Door Locks"])
Out[924]:
<AxesSubplot:xlabel='Power Door Locks', ylabel='count'>
In [925]:
main.columns[91]
Out[925]:
'Child Safety Locks'
In [926]:
main["Child Safety Locks"]
Out[926]:
0 YES
1 YES
2 YES
3 YES
4 YES
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Child Safety Locks, Length: 160031, dtype: object
In [927]:
main["Child Safety Locks"].unique()
Out[927]:
array(['YES', nan, 'NO'], dtype=object)
In [928]:
plt.figure(figsize=(15,5))
sns.countplot(main["Child Safety Locks"])
Out[928]:
<AxesSubplot:xlabel='Child Safety Locks', ylabel='count'>
In [929]:
main.columns[92]
Out[929]:
'Side Airbag-Front'
In [930]:
main["Side Airbag-Front"]
Out[930]:
0 NaN
1 NaN
2 NaN
3 NaN
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Side Airbag-Front, Length: 160031, dtype: object
In [931]:
main["Side Airbag-Front"].unique()
Out[931]:
array([nan, 'YES', 'NO'], dtype=object)
In [932]:
plt.figure(figsize=(15,5))
sns.countplot(main["Side Airbag-Front"])
Out[932]:
<AxesSubplot:xlabel='Side Airbag-Front', ylabel='count'>
In [933]:
main.columns[93]
Out[933]:
'Day & Night Rear View Mirror'
In [934]:
main["Day & Night Rear View Mirror"]
Out[934]:
0 YES
1 NaN
2 YES
3 YES
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Day & Night Rear View Mirror, Length: 160031, dtype: object
In [935]:
main["Day & Night Rear View Mirror"].unique()
Out[935]:
array(['YES', nan, 'NO'], dtype=object)
In [936]:
plt.figure(figsize=(15,5))
sns.countplot(main["Day & Night Rear View Mirror"])
Out[936]:
<AxesSubplot:xlabel='Day & Night Rear View Mirror', ylabel='count'>
In [937]:
main.columns[94]
Out[937]:
'Passenger Side Rear View Mirror'
In [938]:
main["Passenger Side Rear View Mirror"]
Out[938]:
0 YES
1 YES
2 YES
3 YES
4 YES
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Passenger Side Rear View Mirror, Length: 160031, dtype: object
In [939]:
main["Passenger Side Rear View Mirror"].unique()
Out[939]:
array(['YES', nan, 'NO'], dtype=object)
In [940]:
plt.figure(figsize=(15,5))
sns.countplot(main["Passenger Side Rear View Mirror"])
Out[940]:
<AxesSubplot:xlabel='Passenger Side Rear View Mirror', ylabel='count'>
In [941]:
main.columns[95]
Out[941]:
'Rear Seat Belts'
In [942]:
main["Rear Seat Belts"]
Out[942]:
0 YES
1 YES
2 YES
3 YES
4 YES
...
160026 NaN
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Rear Seat Belts, Length: 160031, dtype: object
In [943]:
main["Rear Seat Belts"].unique()
Out[943]:
array(['YES', nan, 'NO'], dtype=object)
In [944]:
plt.figure(figsize=(15,5))
sns.countplot(main["Rear Seat Belts"])
Out[944]:
<AxesSubplot:xlabel='Rear Seat Belts', ylabel='count'>
In [945]:
main.columns[96]
Out[945]:
'Seat Belt Warning'
In [946]:
main["Seat Belt Warning"]
Out[946]:
0 YES
1 YES
2 YES
3 YES
4 YES
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Seat Belt Warning, Length: 160031, dtype: object
In [947]:
main["Seat Belt Warning"].unique()
Out[947]:
array(['YES', nan, 'NO'], dtype=object)
In [948]:
plt.figure(figsize=(15,5))
sns.countplot(main["Seat Belt Warning"])
Out[948]:
<AxesSubplot:xlabel='Seat Belt Warning', ylabel='count'>
In [949]:
main.columns[97]
Out[949]:
'Door Ajar Warning'
In [950]:
main["Door Ajar Warning"]
Out[950]:
0 YES
1 NaN
2 NaN
3 YES
4 NO
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Door Ajar Warning, Length: 160031, dtype: object
In [951]:
main["Door Ajar Warning"].unique()
Out[951]:
array(['YES', nan, 'NO'], dtype=object)
In [952]:
plt.figure(figsize=(15,5))
sns.countplot(main["Door Ajar Warning"])
Out[952]:
<AxesSubplot:xlabel='Door Ajar Warning', ylabel='count'>
In [953]:
main.columns[98]
Out[953]:
'Adjustable Seats'
In [954]:
main["Adjustable Seats"]
Out[954]:
0 YES
1 YES
2 YES
3 YES
4 YES
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Adjustable Seats, Length: 160031, dtype: object
In [955]:
main["Adjustable Seats"].unique()
Out[955]:
array(['YES', nan, 'NO'], dtype=object)
In [956]:
plt.figure(figsize=(15,5))
sns.countplot(main["Adjustable Seats"])
Out[956]:
<AxesSubplot:xlabel='Adjustable Seats', ylabel='count'>
In [957]:
main.columns[99]
Out[957]:
'Engine Immobilizer'
In [958]:
main["Engine Immobilizer"]
Out[958]:
0 YES
1 NaN
2 YES
3 YES
4 NaN
...
160026 NaN
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Engine Immobilizer, Length: 160031, dtype: object
In [959]:
main["Engine Immobilizer"].unique()
Out[959]:
array(['YES', nan, 'NO', 'Tubeless,Radial'], dtype=object)
In [960]:
plt.figure(figsize=(15,5))
sns.countplot(main["Engine Immobilizer"])
Out[960]:
<AxesSubplot:xlabel='Engine Immobilizer', ylabel='count'>
In [961]:
main["Anti-Lock Braking System"]=main["Anti-Lock Braking System"].apply(lambda x:preprocess_to_null_out(x,False))
In [962]:
plt.figure(figsize=(15,5))
sns.countplot(main["Anti-Lock Braking System"])
Out[962]:
<AxesSubplot:xlabel='Anti-Lock Braking System', ylabel='count'>
In [963]:
main["Engine Immobilizer"]=main["Engine Immobilizer"].apply(lambda x:preprocess_to_null_out(x,False))
In [964]:
plt.figure(figsize=(15,5))
sns.countplot(main["Engine Immobilizer"])
Out[964]:
<AxesSubplot:xlabel='Engine Immobilizer', ylabel='count'>
In [965]:
main.columns[100]
Out[965]:
'Crash Sensor'
In [966]:
main["Crash Sensor"]
Out[966]:
0 YES
1 YES
2 YES
3 YES
4 YES
...
160026 NaN
160027 NaN
160028 NaN
160029 NaN
160030 NaN
Name: Crash Sensor, Length: 160031, dtype: object
In [967]:
main["Crash Sensor"].unique()
Out[967]:
array(['YES', nan, 'NO'], dtype=object)
In [968]:
plt.figure(figsize=(15,5))
sns.countplot(main["Crash Sensor"])
Out[968]:
<AxesSubplot:xlabel='Crash Sensor', ylabel='count'>
In [969]:
main.columns[101]
Out[969]:
'Engine Check Warning'
In [970]:
main["Engine Check Warning"]
Out[970]:
0 YES
1 NaN
2 YES
3 YES
4 YES
...
160026 NaN
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Engine Check Warning, Length: 160031, dtype: object
In [971]:
main["Engine Check Warning"].unique()
Out[971]:
array(['YES', nan, 'NO'], dtype=object)
In [972]:
plt.figure(figsize=(15,5))
sns.countplot(main["Engine Check Warning"])
Out[972]:
<AxesSubplot:xlabel='Engine Check Warning', ylabel='count'>
In [973]:
main.columns[102]
Out[973]:
'Automatic Headlamps'
In [974]:
main["Automatic Headlamps"]
Out[974]:
0 YES
1 NaN
2 NO
3 YES
4 NaN
...
160026 NaN
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Automatic Headlamps, Length: 160031, dtype: object
In [975]:
main["Automatic Headlamps"].unique()
Out[975]:
array(['YES', nan, 'NO'], dtype=object)
In [976]:
plt.figure(figsize=(15,5))
sns.countplot(main["Automatic Headlamps"])
Out[976]:
<AxesSubplot:xlabel='Automatic Headlamps', ylabel='count'>
In [977]:
main.columns[103]
Out[977]:
'EBD'
In [978]:
main["EBD"]
Out[978]:
0 YES
1 YES
2 YES
3 YES
4 YES
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: EBD, Length: 160031, dtype: object
In [979]:
main["EBD"].unique()
Out[979]:
array(['YES', nan, 'NO'], dtype=object)
In [980]:
plt.figure(figsize=(15,5))
sns.countplot(main["EBD"])
Out[980]:
<AxesSubplot:xlabel='EBD', ylabel='count'>
In [981]:
main.columns[104]
Out[981]:
'Electronic Stability Control'
In [982]:
main["Electronic Stability Control"]
Out[982]:
0 NO
1 NaN
2 NaN
3 YES
4 NO
...
160026 NaN
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Electronic Stability Control, Length: 160031, dtype: object
In [983]:
main["Electronic Stability Control"].unique()
Out[983]:
array(['NO', nan, 'YES'], dtype=object)
In [984]:
plt.figure(figsize=(15,5))
sns.countplot(main["Electronic Stability Control"])
Out[984]:
<AxesSubplot:xlabel='Electronic Stability Control', ylabel='count'>
In [985]:
main.columns[105]
Out[985]:
'Follow Me Home Headlamps'
In [986]:
main["Follow Me Home Headlamps"]
Out[986]:
0 YES
1 NaN
2 NaN
3 YES
4 NaN
...
160026 NaN
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Follow Me Home Headlamps, Length: 160031, dtype: object
In [987]:
main["Follow Me Home Headlamps"].unique()
Out[987]:
array(['YES', nan, 'NO'], dtype=object)
In [988]:
plt.figure(figsize=(15,5))
sns.countplot(main["Follow Me Home Headlamps"])
Out[988]:
<AxesSubplot:xlabel='Follow Me Home Headlamps', ylabel='count'>
In [989]:
main.columns[106]
Out[989]:
'Rear Camera'
In [990]:
main["Rear Camera"]
Out[990]:
0 YES
1 NaN
2 YES
3 YES
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Rear Camera, Length: 160031, dtype: object
In [991]:
main["Rear Camera"].unique()
Out[991]:
array(['YES', nan, 'NO'], dtype=object)
In [992]:
plt.figure(figsize=(15,5))
sns.countplot(main["Rear Camera"])
Out[992]:
<AxesSubplot:xlabel='Rear Camera', ylabel='count'>
In [993]:
main.columns[107]
Out[993]:
'ISOFIX Child Seat Mounts'
In [994]:
main["ISOFIX Child Seat Mounts"]
Out[994]:
0 YES
1 NaN
2 YES
3 YES
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: ISOFIX Child Seat Mounts, Length: 160031, dtype: object
In [995]:
main["ISOFIX Child Seat Mounts"].unique()
Out[995]:
array(['YES', nan, 'NO'], dtype=object)
In [996]:
plt.figure(figsize=(15,5))
sns.countplot(main["ISOFIX Child Seat Mounts"])
Out[996]:
<AxesSubplot:xlabel='ISOFIX Child Seat Mounts', ylabel='count'>
In [997]:
main.columns[109]
Out[997]:
'Hill Assist'
In [998]:
main["Hill Assist"]
Out[998]:
0 NO
1 NaN
2 NO
3 YES
4 NO
...
160026 NaN
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Hill Assist, Length: 160031, dtype: object
In [999]:
main["Hill Assist"].unique()
Out[999]:
array(['NO', nan, 'YES'], dtype=object)
In [1000]:
plt.figure(figsize=(15,5))
sns.countplot(main["Hill Assist"])
Out[1000]:
<AxesSubplot:xlabel='Hill Assist', ylabel='count'>
In [1001]:
main.columns[110]
Out[1001]:
'Radio'
In [1002]:
main["Radio"]
Out[1002]:
0 YES
1 NaN
2 YES
3 YES
4 NO
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Radio, Length: 160031, dtype: object
In [1003]:
main["Radio"].unique()
Out[1003]:
array(['YES', nan, 'NO'], dtype=object)
In [1004]:
plt.figure(figsize=(15,5))
sns.countplot(main["Radio"])
Out[1004]:
<AxesSubplot:xlabel='Radio', ylabel='count'>
In [1005]:
main.columns[111]
Out[1005]:
'Audio System Remote Control'
In [1006]:
main["Audio System Remote Control"]
Out[1006]:
0 YES
1 NaN
2 YES
3 YES
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Audio System Remote Control, Length: 160031, dtype: object
In [1007]:
main["Audio System Remote Control"].unique()
Out[1007]:
array(['YES', nan, 'NO'], dtype=object)
In [1008]:
plt.figure(figsize=(15,5))
sns.countplot(main["Audio System Remote Control"])
Out[1008]:
<AxesSubplot:xlabel='Audio System Remote Control', ylabel='count'>
In [1009]:
main.columns[112]
Out[1009]:
'Speakers Front'
In [1010]:
main["Speakers Front"]
Out[1010]:
0 YES
1 NaN
2 YES
3 YES
4 NO
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Speakers Front, Length: 160031, dtype: object
In [1011]:
main["Speakers Front"].unique()
Out[1011]:
array(['YES', nan, 'NO'], dtype=object)
In [1012]:
plt.figure(figsize=(15,5))
sns.countplot(main["Speakers Front"])
Out[1012]:
<AxesSubplot:xlabel='Speakers Front', ylabel='count'>
In [1013]:
main.columns[113]
Out[1013]:
'Speakers Rear'
In [1014]:
main["Speakers Rear"]
Out[1014]:
0 YES
1 NaN
2 YES
3 YES
4 NO
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Speakers Rear, Length: 160031, dtype: object
In [1015]:
main["Speakers Rear"].unique()
Out[1015]:
array(['YES', nan, 'NO'], dtype=object)
In [1016]:
plt.figure(figsize=(15,5))
sns.countplot(main["Speakers Rear"])
Out[1016]:
<AxesSubplot:xlabel='Speakers Rear', ylabel='count'>
In [1017]:
main.columns[114]
Out[1017]:
'Integrated 2DIN Audio'
In [1018]:
main["Integrated 2DIN Audio"]
Out[1018]:
0 YES
1 NaN
2 YES
3 YES
4 NO
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Integrated 2DIN Audio, Length: 160031, dtype: object
In [1019]:
main["Integrated 2DIN Audio"].unique()
Out[1019]:
array(['YES', nan, 'NO'], dtype=object)
In [1020]:
plt.figure(figsize=(15,5))
sns.countplot(main["Integrated 2DIN Audio"])
Out[1020]:
<AxesSubplot:xlabel='Integrated 2DIN Audio', ylabel='count'>
In [1021]:
main.columns[115]
Out[1021]:
'USB & Auxiliary input'
In [1022]:
main["USB & Auxiliary input"]
Out[1022]:
0 YES
1 NaN
2 YES
3 YES
4 NO
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: USB & Auxiliary input, Length: 160031, dtype: object
In [1023]:
main["USB & Auxiliary input"].unique()
Out[1023]:
array(['YES', nan, 'NO'], dtype=object)
In [1024]:
plt.figure(figsize=(15,5))
sns.countplot(main["USB & Auxiliary input"])
Out[1024]:
<AxesSubplot:xlabel='USB & Auxiliary input', ylabel='count'>
In [1025]:
main.columns[116]
Out[1025]:
'Bluetooth Connectivity'
In [1026]:
main["Bluetooth Connectivity"]
Out[1026]:
0 YES
1 NaN
2 YES
3 YES
4 NO
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Bluetooth Connectivity, Length: 160031, dtype: object
In [1027]:
main["Bluetooth Connectivity"].unique()
Out[1027]:
array(['YES', nan, 'NO'], dtype=object)
In [1028]:
plt.figure(figsize=(15,5))
sns.countplot(main["Bluetooth Connectivity"])
Out[1028]:
<AxesSubplot:xlabel='Bluetooth Connectivity', ylabel='count'>
In [1029]:
main.columns[117]
Out[1029]:
'Android Auto'
In [1030]:
main["Android Auto"]
Out[1030]:
0 YES
1 NaN
2 YES
3 YES
4 NO
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Android Auto, Length: 160031, dtype: object
In [1031]:
main["Android Auto"].unique()
Out[1031]:
array(['YES', nan, 'NO'], dtype=object)
In [1032]:
plt.figure(figsize=(15,5))
sns.countplot(main["Android Auto"])
Out[1032]:
<AxesSubplot:xlabel='Android Auto', ylabel='count'>
In [1033]:
main.columns[118]
Out[1033]:
'Apple CarPlay'
In [1034]:
main["Apple CarPlay"]
Out[1034]:
0 YES
1 NaN
2 YES
3 YES
4 NO
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Apple CarPlay, Length: 160031, dtype: object
In [1035]:
main["Apple CarPlay"].unique()
Out[1035]:
array(['YES', nan, 'NO'], dtype=object)
In [1036]:
plt.figure(figsize=(15,5))
sns.countplot(main["Apple CarPlay"])
Out[1036]:
<AxesSubplot:xlabel='Apple CarPlay', ylabel='count'>
In [1037]:
main.columns[119]
Out[1037]:
'Place'
In [1038]:
main["Place"]
Out[1038]:
0 On-Road Price in Amritsar :
1 On-Road Price in Kolkata :
2 On-Road Price in Surat :
3 On-Road Price in Thane :
4 On-Road Price in Navi Mumbai :
...
160026 On-Road Price in Gurgaon : (Not Available in F...
160027 On-Road Price in New Delhi : (Not Available in...
160028 On-Road Price in Kolkata : (Not Available in D...
160029 On-Road Price in New Delhi :
160030 On-Road Price in New Delhi : (Not Available in...
Name: Place, Length: 160031, dtype: object
In [1039]:
main["Place"].unique()
Out[1039]:
array(['On-Road Price in Amritsar : ', 'On-Road Price in Kolkata : ',
'On-Road Price in Surat : ', 'On-Road Price in Thane : ',
'On-Road Price in Navi Mumbai : ', 'On-Road Price in Jaipur : ',
'On-Road Price in Ludhiana : ', 'On-Road Price in Madurai :',
'On-Road Price in Jabalpur : ', 'On-Road Price in Chennai : ',
'On-Road Price in Ahmedabad : ', 'On-Road Price in Hyderabad :',
'On-Road Price in Varanasi : ', 'On-Road Price in Bangalore :',
'On-Road Price in Panvel : (Not Available in Navi Mumbai)',
'On-Road Price in Meerut : ', 'On-Road Price in Srinagar :',
'On-Road Price in Chandigarh :',
'On-Road Price in New Delhi : (Not Available in Jodhpur)',
'On-Road Price in Vijayawada :', 'On-Road Price in Gwalior : ',
'On-Road Price in Kanpur : ', 'On-Road Price in Kota : ',
'On-Road Price in Ghaziabad :', 'On-Road Price in Howrah : ',
'On-Road Price in Mysore : ', 'On-Road Price in Srinagar : ',
'On-Road Price in Raipur : ', 'On-Road Price in Dhanbad : ',
'On-Road Price in Patna : ', 'On-Road Price in Mysore :',
'On-Road Price in Hyderabad : ', 'On-Road Price in Jodhpur : ',
'On-Road Price in Solapur : ',
'On-Road Price in Pune : (Not Available in Pimpri chinchwad)',
'On-Road Price in Gwalior :', 'On-Road Price in Coimbatore : ',
'On-Road Price in New Delhi : (Not Available in Navi Mumbai)',
'On-Road Price in Bhopal : ', 'On-Road Price in Guwahati : ',
'On-Road Price in New Delhi : (Not Available in Solapur)',
'On-Road Price in Faridabad : ', 'On-Road Price in Jaipur :',
'On-Road Price in Faridabad :', 'On-Road Price in Nagpur : ',
'On-Road Price in Pune :(Not Available in Pimpri chinchwad)',
'On-Road Price in Indore : ', 'On-Road Price in Mumbai : ',
'On-Road Price in Raipur :', 'On-Road Price in Vijayawada : ',
'On-Road Price in Chandigarh : ', 'On-Road Price in Aurangabad : ',
'On-Road Price in Agra : ', 'On-Road Price in Lucknow : ',
'On-Road Price in Surat :', 'On-Road Price in New Delhi : ',
'On-Road Price in New Delhi : (Not Available in Rajkot)',
'On-Road Price in Madurai : ', 'On-Road Price in Pune : ',
'On-Road Price in Ghaziabad : ',
'On-Road Price in Visakhapatnam : ',
'On-Road Price in Bangalore : ', 'On-Road Price in Vadodara : ',
'On-Road Price in Allahabad : ',
'On-Road Price in New Delhi : (Not Available in Surat)',
'On-Road Price in Navi Mumbai :', 'On-Road Price in Nashik : ',
'On-Road Price in Amritsar :', 'On-Road Price in Nashik :',
'On-Road Price in Aurangabad :', 'On-Road Price in Howrah :',
'On-Road Price in Agra :', 'On-Road Price in Pune :',
'On-Road Price in Rajkot : ', 'On-Road Price in Ranchi :',
'On-Road Price in Ranchi : ', 'On-Road Price in Nagpur :',
'On-Road Price in Kota :', 'On-Road Price in Solapur :',
'On-Road Price in New Delhi :',
'On-Road Price in Kharghar : (Not Available in Navi Mumbai)',
'On-Road Price in Jabalpur :', 'On-Road Price in Patna :',
'On-Road Price in Coimbatore :', 'On-Road Price in Chennai :',
'On-Road Price in New Delhi : (Not Available in Vadodara)',
'On-Road Price in Vadodara :',
'On-Road Price in New Delhi : (Not Available in Aurangabad)',
'On-Road Price in New Delhi : (Not Available in Bhopal)',
'On-Road Price in Dhanbad :', 'On-Road Price in Bhopal :',
'On-Road Price in New Delhi : (Not Available in Chennai)',
'On-Road Price in Allahabad :', 'On-Road Price in Lucknow :',
'On-Road Price in New Delhi : (Not Available in Ranchi)',
'On-Road Price in New Delhi : (Not Available in Srinagar)',
'On-Road Price in Guwahati :', 'On-Road Price in Ahmedabad :',
'On-Road Price in New Delhi : (Not Available in Gwalior)',
'On-Road Price in Rajkot :', 'On-Road Price in Kanpur :',
'On-Road Price in New Delhi : (Not Available in Pune)',
'On-Road Price in New Delhi : (Not Available in Meerut)',
'On-Road Price in New Delhi : (Not Available in Howrah)',
'On-Road Price in Mumbai :', 'On-Road Price in Meerut :',
'On-Road Price in New Delhi : (Not Available in Lucknow)',
'On-Road Price in Varanasi :',
'On-Road Price in New Delhi : (Not Available in Varanasi)',
'On-Road Price in New Delhi : (Not Available in Kota)',
'On-Road Price in New Delhi : (Not Available in Ludhiana)',
'On-Road Price in Ludhiana :',
'On-Road Price in New Delhi : (Not Available in Raipur)',
'On-Road Price in Indore :',
'On-Road Price in New Delhi : (Not Available in Bangalore)',
'On-Road Price in New Delhi : (Not Available in Patna)',
'On-Road Price in New Delhi : (Not Available in Dhanbad)',
'On-Road Price in New Delhi : (Not Available in Mysore)',
'On-Road Price in Kolkata : (Not Available in Howrah)',
'On-Road Price in Visakhapatnam :',
'On-Road Price in New Delhi : (Not Available in Mumbai)',
'On-Road Price in New Delhi : (Not Available in Ahmedabad)',
'On-Road Price in New Delhi : (Not Available in Visakhapatnam)',
'On-Road Price in New Delhi : (Not Available in Allahabad)',
'On-Road Price in Thane :',
'On-Road Price in New Delhi : (Not Available in Vijayawada)',
'On-Road Price in New Delhi : (Not Available in Faridabad)',
'On-Road Price in New Delhi : (Not Available in Thane)',
'On-Road Price in New Delhi : (Not Available in Nagpur)',
'On-Road Price in New Delhi : (Not Available in Amritsar)',
'On-Road Price in New Delhi : (Not Available in Ghaziabad)',
'On-Road Price in New Delhi : (Not Available in Coimbatore)',
'On-Road Price in Jodhpur :',
'On-Road Price in New Delhi : (Not Available in Indore)',
'On-Road Price in Kolkata :',
'On-Road Price in New Delhi : (Not Available in Pimpri chinchwad)',
'On-Road Price in New Delhi : (Not Available in Hyderabad)',
'On-Road Price in New Delhi : (Not Available in Nashik)',
'On-Road Price in New Delhi : (Not Available in Kanpur)',
'On-Road Price in New Delhi : (Not Available in Madurai)',
'On-Road Price in New Delhi : (Not Available in Jaipur)',
'On-Road Price in New Delhi : (Not Available in Guwahati)',
'On-Road Price in New Delhi : (Not Available in Jabalpur)',
'On-Road Price in New Delhi : (Not Available in Kolkata)',
'On-Road Price in New Delhi : (Not Available in Agra)',
'On-Road Price in New Delhi : (Not Available in Chandigarh)',
'On-Road Price in Kapurthala : (Not Available in Amritsar)',
'On-Road Price in Pimpri chinchwad : ',
'On-Road Price in Kolhapur : (Not Available in Solapur)',
'On-Road Price in Mumbai : (Not Available in Navi Mumbai)',
'On-Road Price in Allahabad : (Not Available in Varanasi)',
'On-Road Price in Bhilwara : ',
'On-Road Price in Gurgaon : (Not Available in Faridabad)',
'On-Road Price in Bhopal : (Not Available in Gwalior)',
'On-Road Price in Kolkata : (Not Available in Patna)',
'On-Road Price in Udaipur : ',
'On-Road Price in Jamshedpur : (Not Available in Dhanbad)',
'On-Road Price in Jammu : (Not Available in Srinagar)',
'On-Road Price in Tiruchirappalli : ',
'On-Road Price in Bhopal : (Not Available in Jabalpur)',
'On-Road Price in Pune : (Not Available in Solapur)',
'On-Road Price in Guntur : ',
'On-Road Price in Agra : (Not Available in Ghaziabad)',
'On-Road Price in Jamshedpur : ',
'On-Road Price in Bareilly : (Not Available in Meerut)',
'On-Road Price in Bangalore : (Not Available in Mysore)',
'On-Road Price in Ahmedabad : (Not Available in Vadodara)',
'On-Road Price in Udaipur : (Not Available in Jodhpur)',
'On-Road Price in Lucknow : (Not Available in Kanpur)',
'On-Road Price in Ranchi : (Not Available in Dhanbad)',
'On-Road Price in Lucknow : (Not Available in Allahabad)',
'On-Road Price in Lucknow : (Not Available in Agra)',
'On-Road Price in Visakhapatnam : (Not Available in Vijayawada)',
'On-Road Price in Lucknow : (Not Available in Ghaziabad)',
'On-Road Price in Lucknow : (Not Available in Varanasi)',
'On-Road Price in Jaipur : (Not Available in Kota)',
'On-Road Price in Ludhiana : (Not Available in Amritsar)',
'On-Road Price in Nashik : (Not Available in Aurangabad)',
'On-Road Price in Lucknow : (Not Available in Meerut)',
'On-Road Price in Kolkata : (Not Available in Guwahati)',
'On-Road Price in Noida : (Not Available in Ghaziabad)',
'On-Road Price in Indore : (Not Available in Jabalpur)',
'On-Road Price in Navi Mumbai : (Not Available in Nashik)',
'On-Road Price in Mumbai : (Not Available in Thane)',
'On-Road Price in Mangalagiri : (Not Available in Vijayawada)',
'On-Road Price in Indore : (Not Available in Bhopal)',
'On-Road Price in Indore : (Not Available in Gwalior)',
'On-Road Price in Mangalagiri : (Not Available in Visakhapatnam)',
'On-Road Price in Noida : (Not Available in Agra)',
'On-Road Price in Noida : (Not Available in Meerut)',
'On-Road Price in Mumbai : (Not Available in Aurangabad)',
'On-Road Price in Mumbai : (Not Available in Surat)',
'On-Road Price in Chennai : (Not Available in Visakhapatnam)',
'On-Road Price in Chennai : (Not Available in Mysore)',
'On-Road Price in Mumbai : (Not Available in Ahmedabad)',
'On-Road Price in Mumbai : (Not Available in Nashik)',
'On-Road Price in Mumbai : (Not Available in Pune)',
'On-Road Price in Mumbai : (Not Available in Indore)',
'On-Road Price in Mumbai : (Not Available in Nagpur)',
'On-Road Price in Chennai : (Not Available in Raipur)',
'On-Road Price in Chennai : (Not Available in Bangalore)',
'On-Road Price in Mumbai : (Not Available in Pimpri chinchwad)',
'On-Road Price in Chennai : (Not Available in Vijayawada)',
'On-Road Price in Mumbai : (Not Available in Vadodara)',
'On-Road Price in Mumbai : (Not Available in Solapur)',
'On-Road Price in Chennai : (Not Available in Coimbatore)',
'On-Road Price in Chennai : (Not Available in Hyderabad)',
'On-Road Price in Mumbai : (Not Available in Rajkot)',
'On-Road Price in Chennai : (Not Available in Madurai)',
'On-Road Price in Kolkata : (Not Available in Dhanbad)',
'On-Road Price in Barshi : (Not Available in Solapur)',
'On-Road Price in Chandrapur : (Not Available in Nagpur)',
'On-Road Price in Chhindwara : (Not Available in Bhopal)',
'On-Road Price in Vadodara : (Not Available in Surat)',
'On-Road Price in Kolkata : (Not Available in Ranchi)',
'On-Road Price in Chhindwara : (Not Available in Jabalpur)',
'On-Road Price in Chhindwara : (Not Available in Gwalior)',
'On-Road Price in Ahmedabad : (Not Available in Rajkot)',
'On-Road Price in Vijayawada : (Not Available in Visakhapatnam)',
'On-Road Price in Kolkata : (Not Available in Raipur)',
'On-Road Price in Chhindwara : (Not Available in Indore)',
'On-Road Price in Agra : (Not Available in Meerut)',
'On-Road Price in Mumbai : (Not Available in Mysore)',
'On-Road Price in Mumbai : (Not Available in Bangalore)',
'On-Road Price in Mumbai : (Not Available in Coimbatore)',
'On-Road Price in Mumbai : (Not Available in Vijayawada)',
'On-Road Price in Mumbai : (Not Available in Hyderabad)',
'On-Road Price in Mumbai : (Not Available in Madurai)',
'On-Road Price in Mumbai : (Not Available in Chennai)',
'On-Road Price in Mumbai : (Not Available in Visakhapatnam)',
'On-Road Price in Guntur : (Not Available in Vijayawada)',
'On-Road Price in Rajahmundry : ',
'On-Road Price in Pune : (Not Available in Aurangabad)',
'On-Road Price in Sant Kabir Nagar : (Not Available in Varanasi)',
'On-Road Price in North 24 Parganas : (Not Available in Dhanbad)',
'On-Road Price in North 24 Parganas : (Not Available in Ranchi)',
'On-Road Price in Jalandhar : (Not Available in Amritsar)',
'On-Road Price in North 24 Parganas : (Not Available in Howrah)',
'On-Road Price in North 24 Parganas : (Not Available in Patna)',
'On-Road Price in North 24 Parganas : ',
'On-Road Price in Ajmer : (Not Available in Jodhpur)',
'On-Road Price in Jaipur : (Not Available in Jodhpur)',
'On-Road Price in Satara : (Not Available in Solapur)',
'On-Road Price in Guna : (Not Available in Gwalior)',
'On-Road Price in Aurangabad : (Not Available in Nagpur)',
'On-Road Price in Coimbatore : (Not Available in Madurai)',
'On-Road Price in Guntur : (Not Available in Visakhapatnam)',
'On-Road Price in Jamshedpur : (Not Available in Ranchi)',
'On-Road Price in Ghaziabad : (Not Available in Meerut)',
'On-Road Price in Krishna : ',
'On-Road Price in Krishna : (Not Available in Vijayawada)',
'On-Road Price in Anantnag : (Not Available in Srinagar)',
'On-Road Price in Ahmedabad : (Not Available in Surat)',
'On-Road Price in Pune : (Not Available in Nagpur)',
'On-Road Price in Gurgaon : ',
'On-Road Price in Varanasi : (Not Available in Allahabad)',
'On-Road Price in Jalandhar : ',
'On-Road Price in Krishna : (Not Available in Visakhapatnam)',
'On-Road Price in Rohtak : (Not Available in Faridabad)',
'On-Road Price in Kolkata : (Not Available in Allahabad)',
'On-Road Price in Rohtak : ',
'On-Road Price in Thane : (Not Available in Nashik)',
'On-Road Price in Thane : (Not Available in Aurangabad)',
'On-Road Price in Muzaffarpur : ',
'On-Road Price in Panvel : (Not Available in Pune)',
'On-Road Price in Panvel : (Not Available in Pimpri chinchwad)',
'On-Road Price in Etawah : (Not Available in Agra)',
'On-Road Price in Nagaon : ',
'On-Road Price in Thane : (Not Available in Solapur)',
'On-Road Price in Hazaribagh : ',
'On-Road Price in Thane : (Not Available in Pimpri chinchwad)',
'On-Road Price in Panvel : ',
'On-Road Price in Thane : (Not Available in Nagpur)',
'On-Road Price in Jammu : ',
'On-Road Price in Kolkata : (Not Available in Visakhapatnam)',
'On-Road Price in Kolkata : (Not Available in Varanasi)',
'On-Road Price in Sangli : (Not Available in Solapur)',
'On-Road Price in Anand : (Not Available in Ahmedabad)',
'On-Road Price in Ballabhgarh : (Not Available in Faridabad)'],
dtype=object)
In [1040]:
def preprocess_Place(text):
text=text.split(":")[0]
text=text.replace("On-Road Price in ","")
if text[len(text)-1]==" ":
text=text[:len(text)-1]
text=text.title()
return text
In [1041]:
preprocess_Place("On-Road Price in Thane : (Not Available in Solapur)")
Out[1041]:
'Thane'
In [1042]:
main["Place"]=main["Place"].apply(preprocess_Place)
In [1043]:
main["Place"].unique()
Out[1043]:
array(['Amritsar', 'Kolkata', 'Surat', 'Thane', 'Navi Mumbai', 'Jaipur',
'Ludhiana', 'Madurai', 'Jabalpur', 'Chennai', 'Ahmedabad',
'Hyderabad', 'Varanasi', 'Bangalore', 'Panvel', 'Meerut',
'Srinagar', 'Chandigarh', 'New Delhi', 'Vijayawada', 'Gwalior',
'Kanpur', 'Kota', 'Ghaziabad', 'Howrah', 'Mysore', 'Raipur',
'Dhanbad', 'Patna', 'Jodhpur', 'Solapur', 'Pune', 'Coimbatore',
'Bhopal', 'Guwahati', 'Faridabad', 'Nagpur', 'Indore', 'Mumbai',
'Aurangabad', 'Agra', 'Lucknow', 'Visakhapatnam', 'Vadodara',
'Allahabad', 'Nashik', 'Rajkot', 'Ranchi', 'Kharghar',
'Kapurthala', 'Pimpri Chinchwad', 'Kolhapur', 'Bhilwara',
'Gurgaon', 'Udaipur', 'Jamshedpur', 'Jammu', 'Tiruchirappalli',
'Guntur', 'Bareilly', 'Noida', 'Mangalagiri', 'Barshi',
'Chandrapur', 'Chhindwara', 'Rajahmundry', 'Sant Kabir Nagar',
'North 24 Parganas', 'Jalandhar', 'Ajmer', 'Satara', 'Guna',
'Krishna', 'Anantnag', 'Rohtak', 'Muzaffarpur', 'Etawah', 'Nagaon',
'Hazaribagh', 'Sangli', 'Anand', 'Ballabhgarh'], dtype=object)
In [1044]:
main.columns[120]
Out[1044]:
'option'
In [1045]:
main["option"]
Out[1045]:
0 NO
1 YES
2 YES
3 YES
4 YES
...
160026 NO
160027 NO
160028 NO
160029 YES
160030 YES
Name: option, Length: 160031, dtype: object
In [1046]:
main["option"].unique()
Out[1046]:
array(['NO', 'YES'], dtype=object)
In [1047]:
plt.figure(figsize=(15,5))
sns.countplot(main["option"])
Out[1047]:
<AxesSubplot:xlabel='option', ylabel='count'>
In [1048]:
main.columns[122]
Out[1048]:
'City Mileage'
In [1049]:
main["City Mileage"]
Out[1049]:
0 20.0 kmpl
1 NaN
2 13.0 kmpl
3 19.0 kmpl
4 17.0 kmpl
...
160026 NaN
160027 NaN
160028 NaN
160029 NaN
160030 NaN
Name: City Mileage, Length: 160031, dtype: object
In [1050]:
main["City Mileage"].unique()
Out[1050]:
array(['20.0 kmpl', nan, '13.0 kmpl', '19.0 kmpl', '17.0 kmpl',
'19.02 kmpl', '21.0 kmpl', '25.0 km/kg', '14.65 kmpl', '16.0 kmpl',
'14.0 kmpl', '23.0 kmpl', '16.5 kmpl', '32.0 km/kg', '26.0 kmpl',
'29.0 km/kg', '12.3 kmpl', '13.1 kmpl', '10.1 kmpl', '10.5 kmpl',
'9.5 kmpl', '17.5 kmpl', '16.49 kmpl', '11.6 kmpl', '14.9 kmpl',
'10.0 kmpl', '19.42 kmpl', '13.5 kmpl', '4.6 kmpl', '11.0 kmpl',
'8.2 kmpl', '10.1 km/kg', '9.3 kmpl', '21.2 kmpl', '10.2 kmpl',
'20.2 kmpl', '13.7 kmpl', '13.84 kmpl', '7.7 kmpl', '12.14 kmpl',
'12.0 kmpl', '14.68 kmpl', '22.4 kmpl', '9.62 kmpl', '13.48 kmpl',
'11.5 kmpl', '16.28 kmpl', '13.86 kmpl', '15.32 kmpl', '18.0 kmpl',
'24.0 km/kg', '12.6 kmpl', '15.0 kmpl', '12.57 kmpl', '13.78 kmpl',
'22.0 km/kg', '12.06 kmpl', '11.51 kmpl', '12.24 kmpl',
'3.22 kmpl', '8.0 kmpl', '15.64 kmpl', '12.08 kmpl', '9.0 kmpl',
'17.19 kmpl', '15.4 kmpl', '20.37 kmpl', '10.24 kmpl', '6.89 kmpl',
'9.6 kmpl', '6.5 kmpl', '7.6 kmpl', '11.0 km/kg', '11.4 kmpl',
'10.8 kmpl', '8.25 kmpl', '13.6 kmpl', '5.7 kmpl', '13.04 kmpl',
'14.14 kmpl', '12.99 kmpl', '13.41 kmpl', '14.42 kmpl',
'14.03 kmpl', '18.5 kmpl', '26.93 km/kg', '16.8 kmpl',
'14.17 kmpl', '26.0 km/kg', '16.48 kmpl', '23.0 km/kg',
'16.94 kmpl', '17.35 kmpl', '10.52 kmpl', '11.54 kmpl',
'11.96 kmpl', '13.47 kmpl', '12.12 kmpl', '13.64 kmpl'],
dtype=object)
In [1051]:
def preprocess_City_Mileage(text):
text=str(text)
if text=="nan":
return np.nan
varient="".join(re.findall("[a-zA-Z\/]",text))
if varient=="kmpl":
text=float(text.replace(varient,""))
elif varient=="km/kg":
text=float(text.replace(varient,""))*1.40
return text
In [1052]:
main["City Mileage"]=main["City Mileage"].apply(preprocess_City_Mileage)
In [1053]:
main.rename(columns={"City Mileage":"City Mileage(Km/L)"},inplace=True)
In [1054]:
main["City Mileage(Km/L)"]
Out[1054]:
0 20.0
1 NaN
2 13.0
3 19.0
4 17.0
...
160026 NaN
160027 NaN
160028 NaN
160029 NaN
160030 NaN
Name: City Mileage(Km/L), Length: 160031, dtype: float64
In [1055]:
main.columns[123]
Out[1055]:
'Turning Radius (Metres)'
In [1056]:
main["Turning Radius (Metres)"]
Out[1056]:
0 4.8
1 4.5
2 5.5
3 4.8
4 4.5
...
160026 NaN
160027 NaN
160028 NaN
160029 NaN
160030 NaN
Name: Turning Radius (Metres), Length: 160031, dtype: object
In [407]:
def preprocess_turing_radius(text):
text=str(text)
if text=='nan':
return np.nan
text=text.title()
if "Mm" in text:
float_=float(text.replace("".join(re.findall('[a-zA-Z]',text)),""))
return "{} Metres".format(float_/1000)
if "Meters" in text:
text=text.replace("Meters","Metres")
if "Meter" in text:
text=text.replace("Meter","Metres")
if 'M' in text and "Metres" not in text:
text=text.replace("M"," Metres")
if "Metres" not in text:
text=text+" Metres"
if text[text.find('M')-1]!=" ":
text=text[:text.find('M')]+" "+text[text.find('M'):]
return text
In [408]:
main["Turning Radius (Metres)"]=main["Turning Radius (Metres)"].apply(preprocess_turing_radius)
In [1057]:
main.columns[124]
Out[1057]:
'Vanity Mirror'
In [1058]:
main["Vanity Mirror"]
Out[1058]:
0 YES
1 NaN
2 YES
3 YES
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Vanity Mirror, Length: 160031, dtype: object
In [1059]:
main["Vanity Mirror"].unique()
Out[1059]:
array(['YES', nan, 'NO', '5', '4', '2 Zone', '2765', '3 Zone'],
dtype=object)
In [1060]:
plt.figure(figsize=(15,5))
sns.countplot(main["Vanity Mirror"])
Out[1060]:
<AxesSubplot:xlabel='Vanity Mirror', ylabel='count'>
In [1061]:
main["Vanity Mirror"]=main["Vanity Mirror"].apply(lambda x:preprocess_to_null_out(x,False))
In [1062]:
plt.figure(figsize=(15,5))
sns.countplot(main["Vanity Mirror"])
Out[1062]:
<AxesSubplot:xlabel='Vanity Mirror', ylabel='count'>
In [1063]:
main.columns[125]
Out[1063]:
'Navigation System'
In [1064]:
main["Navigation System"]
Out[1064]:
0 YES
1 NaN
2 YES
3 YES
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Navigation System, Length: 160031, dtype: object
In [1065]:
main["Navigation System"].unique()
Out[1065]:
array(['YES', nan, 'NO', '4 Zone'], dtype=object)
In [1066]:
plt.figure(figsize=(15,5))
sns.countplot(main["Navigation System"])
Out[1066]:
<AxesSubplot:xlabel='Navigation System', ylabel='count'>
In [1067]:
main["Navigation System"]=main["Navigation System"].apply(lambda x:preprocess_to_null_out(x,False))
In [1068]:
plt.figure(figsize=(15,5))
sns.countplot(main["Navigation System"])
Out[1068]:
<AxesSubplot:xlabel='Navigation System', ylabel='count'>
In [1069]:
main.columns[126]
Out[1069]:
'Outside Temperature Display'
In [1070]:
main["Outside Temperature Display"]
Out[1070]:
0 YES
1 NaN
2 YES
3 YES
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Outside Temperature Display, Length: 160031, dtype: object
In [1071]:
main["Outside Temperature Display"].unique()
Out[1071]:
array(['YES', nan, 'NO'], dtype=object)
In [1072]:
plt.figure(figsize=(15,5))
sns.countplot(main["Outside Temperature Display"])
Out[1072]:
<AxesSubplot:xlabel='Outside Temperature Display', ylabel='count'>
In [1073]:
main.columns[127]
Out[1073]:
'Manually Adjustable Ext. Rear View Mirror'
In [1074]:
main["Manually Adjustable Ext. Rear View Mirror"]
Out[1074]:
0 NO
1 YES
2 NaN
3 NO
4 YES
...
160026 NaN
160027 NaN
160028 NO
160029 NaN
160030 NaN
Name: Manually Adjustable Ext. Rear View Mirror, Length: 160031, dtype: object
In [1075]:
main["Manually Adjustable Ext. Rear View Mirror"].unique()
Out[1075]:
array(['NO', 'YES', nan], dtype=object)
In [1076]:
plt.figure(figsize=(15,5))
sns.countplot(main["Manually Adjustable Ext. Rear View Mirror"])
Out[1076]:
<AxesSubplot:xlabel='Manually Adjustable Ext. Rear View Mirror', ylabel='count'>
In [1077]:
main.columns[128]
Out[1077]:
'Power Antenna'
In [1078]:
main["Power Antenna"]
Out[1078]:
0 YES
1 NaN
2 NaN
3 YES
4 NO
...
160026 NaN
160027 NaN
160028 NO
160029 NaN
160030 NaN
Name: Power Antenna, Length: 160031, dtype: object
In [1079]:
main["Power Antenna"].unique()
Out[1079]:
array(['YES', nan, 'NO'], dtype=object)
In [1080]:
plt.figure(figsize=(15,5))
sns.countplot(main["Power Antenna"])
Out[1080]:
<AxesSubplot:xlabel='Power Antenna', ylabel='count'>
In [1081]:
main.columns[129]
Out[1081]:
'Brake Assist'
In [1082]:
main["Brake Assist"]
Out[1082]:
0 YES
1 NaN
2 YES
3 YES
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Brake Assist, Length: 160031, dtype: object
In [1083]:
main["Brake Assist"].unique()
Out[1083]:
array(['YES', nan, 'NO'], dtype=object)
In [1084]:
plt.figure(figsize=(15,5))
sns.countplot(main["Brake Assist"])
Out[1084]:
<AxesSubplot:xlabel='Brake Assist', ylabel='count'>
In [1085]:
main.columns[130]
Out[1085]:
'Anti-Theft Alarm'
In [1086]:
main["Anti-Theft Alarm"]
Out[1086]:
0 YES
1 NaN
2 YES
3 YES
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Anti-Theft Alarm, Length: 160031, dtype: object
In [1087]:
main["Anti-Theft Alarm"].unique()
Out[1087]:
array(['YES', nan, 'NO'], dtype=object)
In [1088]:
plt.figure(figsize=(15,5))
sns.countplot(main["Anti-Theft Alarm"])
Out[1088]:
<AxesSubplot:xlabel='Anti-Theft Alarm', ylabel='count'>
In [1089]:
main.columns[131]
Out[1089]:
'Speed Sensing Auto Door Lock'
In [1090]:
main["Speed Sensing Auto Door Lock"]
Out[1090]:
0 YES
1 NaN
2 NaN
3 YES
4 NO
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Speed Sensing Auto Door Lock, Length: 160031, dtype: object
In [1091]:
main["Speed Sensing Auto Door Lock"].unique()
Out[1091]:
array(['YES', nan, 'NO'], dtype=object)
In [1092]:
plt.figure(figsize=(15,5))
sns.countplot(main["Speed Sensing Auto Door Lock"])
Out[1092]:
<AxesSubplot:xlabel='Speed Sensing Auto Door Lock', ylabel='count'>
In [1093]:
main.columns[132]
Out[1093]:
'Chrome Garnish'
In [1094]:
main["Chrome Garnish"]
Out[1094]:
0 YES
1 NaN
2 NaN
3 YES
4 NaN
...
160026 NaN
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Chrome Garnish, Length: 160031, dtype: object
In [1095]:
main["Chrome Garnish"].unique()
Out[1095]:
array(['YES', nan, 'NO'], dtype=object)
In [1096]:
plt.figure(figsize=(15,5))
sns.countplot(main["Chrome Garnish"])
Out[1096]:
<AxesSubplot:xlabel='Chrome Garnish', ylabel='count'>
In [1097]:
main.columns[133]
Out[1097]:
'Side Impact Beams'
In [1098]:
main["Side Impact Beams"]
Out[1098]:
0 NaN
1 YES
2 NaN
3 NaN
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Side Impact Beams, Length: 160031, dtype: object
In [1099]:
main["Side Impact Beams"].unique()
Out[1099]:
array([nan, 'YES', 'NO'], dtype=object)
In [1100]:
plt.figure(figsize=(15,5))
sns.countplot(main["Side Impact Beams"])
Out[1100]:
<AxesSubplot:xlabel='Side Impact Beams', ylabel='count'>
In [1101]:
main.columns[134]
Out[1101]:
'Drive Type'
In [1102]:
main["Drive Type"]
Out[1102]:
0 NaN
1 NaN
2 FWD
3 NaN
4 FWD
...
160026 AWD
160027 NaN
160028 FWD
160029 NaN
160030 NaN
Name: Drive Type, Length: 160031, dtype: object
In [1103]:
main["Drive Type"].unique()
Out[1103]:
array([nan, 'FWD', '2WD', 'Front Wheel Drive', 'RWD', '2wd', '4WD', 'AWD',
'Rear wheels', '4x2', '4x4', '4X4', 'RWD(with MTT)',
'All Wheel Drive'], dtype=object)
In [1104]:
def preprocess_Drive_Type(text):
text=str(text)
if text=="nan":
return np.nan
text=text.title()
return text
In [1105]:
main["Drive Type"]=main["Drive Type"].apply(preprocess_Drive_Type)
In [1106]:
main["Drive Type"].unique()
Out[1106]:
array([nan, 'Fwd', '2Wd', 'Front Wheel Drive', 'Rwd', '4Wd', 'Awd',
'Rear Wheels', '4X2', '4X4', 'Rwd(With Mtt)', 'All Wheel Drive'],
dtype=object)
In [1107]:
plt.figure(figsize=(15,5))
sns.countplot(main["Drive Type"])
Out[1107]:
<AxesSubplot:xlabel='Drive Type', ylabel='count'>
In [1108]:
main.columns[135]
Out[1108]:
'Rear Reading Lamp'
In [1109]:
main["Rear Reading Lamp"]
Out[1109]:
0 NaN
1 NaN
2 NaN
3 NaN
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Rear Reading Lamp, Length: 160031, dtype: object
In [1110]:
main["Rear Reading Lamp"].unique()
Out[1110]:
array([nan, 'NO', 'YES', '5', '4', '2765', '3 Zone'], dtype=object)
In [1111]:
plt.figure(figsize=(15,5))
sns.countplot(main["Rear Reading Lamp"])
Out[1111]:
<AxesSubplot:xlabel='Rear Reading Lamp', ylabel='count'>
In [1112]:
main["Rear Reading Lamp"]=main["Rear Reading Lamp"].apply(lambda x:preprocess_to_null_out(x,False))
In [1113]:
plt.figure(figsize=(15,5))
sns.countplot(main["Rear Reading Lamp"])
Out[1113]:
<AxesSubplot:xlabel='Rear Reading Lamp', ylabel='count'>
In [1114]:
main.columns[136]
Out[1114]:
'Cup Holders-Front'
In [1115]:
main["Cup Holders-Front"]
Out[1115]:
0 NaN
1 NaN
2 NaN
3 NaN
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Cup Holders-Front, Length: 160031, dtype: object
In [1116]:
main["Cup Holders-Front"].unique()
Out[1116]:
array([nan, 'YES', 'NO'], dtype=object)
In [1117]:
plt.figure(figsize=(15,5))
sns.countplot(main["Cup Holders-Front"])
Out[1117]:
<AxesSubplot:xlabel='Cup Holders-Front', ylabel='count'>
In [1118]:
main.columns[137]
Out[1118]:
'Leather Seats'
In [1119]:
main["Leather Seats"]
Out[1119]:
0 NaN
1 NaN
2 NO
3 NaN
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Leather Seats, Length: 160031, dtype: object
In [1120]:
main["Leather Seats"].unique()
Out[1120]:
array([nan, 'NO', 'YES', '6', '2'], dtype=object)
In [1121]:
plt.figure(figsize=(15,5))
sns.countplot(main["Leather Seats"])
Out[1121]:
<AxesSubplot:xlabel='Leather Seats', ylabel='count'>
In [1122]:
main["Leather Seats"]=main["Leather Seats"].apply(lambda x:preprocess_to_null_out(x,False))
In [1123]:
plt.figure(figsize=(15,5))
sns.countplot(main["Leather Seats"])
Out[1123]:
<AxesSubplot:xlabel='Leather Seats', ylabel='count'>
In [1124]:
main.columns[138]
Out[1124]:
'Driving Experience Control Eco'
In [1125]:
main["Driving Experience Control Eco"]
Out[1125]:
0 NaN
1 NaN
2 NaN
3 NaN
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Driving Experience Control Eco, Length: 160031, dtype: object
In [1126]:
main["Driving Experience Control Eco"].unique()
Out[1126]:
array([nan, 'YES', 'NO'], dtype=object)
In [1127]:
plt.figure(figsize=(15,5))
sns.countplot(main["Driving Experience Control Eco"])
Out[1127]:
<AxesSubplot:xlabel='Driving Experience Control Eco', ylabel='count'>
In [1128]:
main.columns[139]
Out[1128]:
'Ventilated Seats'
In [1129]:
main["Ventilated Seats"]
Out[1129]:
0 NaN
1 NaN
2 NaN
3 NaN
4 NaN
...
160026 NaN
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Ventilated Seats, Length: 160031, dtype: object
In [1130]:
main["Ventilated Seats"].unique()
Out[1130]:
array([nan, 'YES', 'NO'], dtype=object)
In [1131]:
plt.figure(figsize=(15,5))
sns.countplot(main["Ventilated Seats"])
Out[1131]:
<AxesSubplot:xlabel='Ventilated Seats', ylabel='count'>
In [1132]:
main.columns[140]
Out[1132]:
'Tyre Pressure Monitor'
In [1133]:
main["Tyre Pressure Monitor"]
Out[1133]:
0 NaN
1 NaN
2 NaN
3 NaN
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Tyre Pressure Monitor, Length: 160031, dtype: object
In [1134]:
main["Tyre Pressure Monitor"].unique()
Out[1134]:
array([nan, 'YES', 'NO', 'Tubeless,Radial'], dtype=object)
In [1135]:
plt.figure(figsize=(15,5))
sns.countplot(main["Tyre Pressure Monitor"])
Out[1135]:
<AxesSubplot:xlabel='Tyre Pressure Monitor', ylabel='count'>
In [1136]:
main["Tyre Pressure Monitor"]=main["Tyre Pressure Monitor"].apply(lambda x:preprocess_to_null_out(x,False))
In [1137]:
plt.figure(figsize=(15,5))
sns.countplot(main["Tyre Pressure Monitor"])
Out[1137]:
<AxesSubplot:xlabel='Tyre Pressure Monitor', ylabel='count'>
In [1138]:
main.columns[141]
Out[1138]:
'Rain Sensing Wiper'
In [1139]:
main["Rain Sensing Wiper"]
Out[1139]:
0 NaN
1 NaN
2 NO
3 NaN
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Rain Sensing Wiper, Length: 160031, dtype: object
In [1140]:
main["Rain Sensing Wiper"].unique()
Out[1140]:
array([nan, 'NO', 'YES'], dtype=object)
In [1141]:
plt.figure(figsize=(15,5))
sns.countplot(main["Rain Sensing Wiper"])
Out[1141]:
<AxesSubplot:xlabel='Rain Sensing Wiper', ylabel='count'>
In [1142]:
main.columns[142]
Out[1142]:
'Turbo Charger'
In [1143]:
main["Turbo Charger"]
Out[1143]:
0 NaN
1 NaN
2 NaN
3 NaN
4 NaN
...
160026 NaN
160027 NaN
160028 Yes
160029 NaN
160030 NaN
Name: Turbo Charger, Length: 160031, dtype: object
In [1144]:
main["Turbo Charger"].unique()
Out[1144]:
array([nan, 'Yes', 'No', 'Twin', 'twin', 'Turbo', 'TWIN', 'YES'],
dtype=object)
In [1145]:
plt.figure(figsize=(15,5))
sns.countplot(main["Turbo Charger"])
Out[1145]:
<AxesSubplot:xlabel='Turbo Charger', ylabel='count'>
In [1146]:
def preprocess_Turbo_Charger(text):
text=str(text)
if text=="nan":
return np.nan
text=text.title()
if "Twin" in text or "Turbo" in text:
return "YES"
text=text.upper()
return text
In [1147]:
main["Turbo Charger"]=main["Turbo Charger"].apply(preprocess_Turbo_Charger)
In [1148]:
main["Turbo Charger"].unique()
Out[1148]:
array([nan, 'YES', 'NO'], dtype=object)
In [1149]:
plt.figure(figsize=(15,5))
sns.countplot(main["Turbo Charger"])
Out[1149]:
<AxesSubplot:xlabel='Turbo Charger', ylabel='count'>
In [1150]:
main.columns[143]
Out[1150]:
'Air Quality Control'
In [1151]:
main["Air Quality Control"]
Out[1151]:
0 NaN
1 NaN
2 NaN
3 NaN
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Air Quality Control, Length: 160031, dtype: object
In [1152]:
main["Air Quality Control"].unique()
Out[1152]:
array([nan, 'NO', 'YES', '5', '4', '2923', '4 Zone'], dtype=object)
In [1153]:
plt.figure(figsize=(15,5))
sns.countplot(main["Air Quality Control"])
Out[1153]:
<AxesSubplot:xlabel='Air Quality Control', ylabel='count'>
In [1154]:
main["Air Quality Control"]=main["Air Quality Control"].apply(lambda x:preprocess_to_null_out(x,False))
In [1155]:
plt.figure(figsize=(15,5))
sns.countplot(main["Air Quality Control"])
Out[1155]:
<AxesSubplot:xlabel='Air Quality Control', ylabel='count'>
In [1156]:
main.columns[144]
Out[1156]:
'Traction Control'
In [1157]:
main["Traction Control"]
Out[1157]:
0 NaN
1 NaN
2 NaN
3 NaN
4 NaN
...
160026 NaN
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Traction Control, Length: 160031, dtype: object
In [1158]:
main["Traction Control"].unique()
Out[1158]:
array([nan, 'YES', 'NO'], dtype=object)
In [1159]:
plt.figure(figsize=(15,5))
sns.countplot(main["Traction Control"])
Out[1159]:
<AxesSubplot:xlabel='Traction Control', ylabel='count'>
In [1160]:
main.columns[145]
Out[1160]:
'Vehicle Stability Control System'
In [1161]:
main["Vehicle Stability Control System"]
Out[1161]:
0 NaN
1 NaN
2 NaN
3 NaN
4 NaN
...
160026 YES
160027 NaN
160028 YES
160029 NaN
160030 NaN
Name: Vehicle Stability Control System, Length: 160031, dtype: object
In [1162]:
main["Vehicle Stability Control System"].unique()
Out[1162]:
array([nan, 'YES', 'NO'], dtype=object)
In [1163]:
plt.figure(figsize=(15,5))
sns.countplot(main["Vehicle Stability Control System"])
Out[1163]:
<AxesSubplot:xlabel='Vehicle Stability Control System', ylabel='count'>
In [4]:
main["Low Fuel Warning Light"]
Out[4]:
0 YES
1 NaN
2 YES
3 YES
4 NO
...
105797 YES
105798 4 Zone
105799 YES
105800 YES
105801 YES
Name: Low Fuel Warning Light, Length: 105802, dtype: object
In [5]:
main["Low Fuel Warning Light"].unique()
Out[5]:
array(['YES', nan, 'NO', '5', '4', '3 Zone', '2765', '4 Zone'],
dtype=object)
In [6]:
plt.figure(figsize=(15,5))
sns.countplot(main["Low Fuel Warning Light"])
Out[6]:
<AxesSubplot:xlabel='Low Fuel Warning Light', ylabel='count'>
In [7]:
main["Low Fuel Warning Light"]=main["Low Fuel Warning Light"].apply(lambda x:preprocess_to_null_out(x,False))
In [10]:
main["Accessory Power Outlet"]
Out[10]:
0 YES
1 YES
2 NaN
3 YES
4 NO
...
105797 YES
105798 4 Zone
105799 YES
105800 YES
105801 YES
Name: Accessory Power Outlet, Length: 105802, dtype: object
In [11]:
main["Accessory Power Outlet"].unique()
Out[11]:
array(['YES', nan, 'NO', '5', '4', '2 Zone', '3 Zone', '2765', '4 Zone'],
dtype=object)
In [12]:
plt.figure(figsize=(15,5))
sns.countplot(main["Accessory Power Outlet"])
Out[12]:
<AxesSubplot:xlabel='Accessory Power Outlet', ylabel='count'>
In [13]:
main["Accessory Power Outlet"]=main["Accessory Power Outlet"].apply(lambda x:preprocess_to_null_out(x,False))
In [14]:
main["Trunk Light"]
Out[14]:
0 YES
1 NaN
2 NaN
3 YES
4 NaN
...
105797 YES
105798 YES
105799 YES
105800 YES
105801 YES
Name: Trunk Light, Length: 105802, dtype: object
In [15]:
main["Trunk Light"].unique()
Out[15]:
array(['YES', nan, 'NO', '5', '4', '2 Zone', '3 Zone', '2765'],
dtype=object)
In [16]:
plt.figure(figsize=(15,5))
sns.countplot(main["Trunk Light"])
Out[16]:
<AxesSubplot:xlabel='Trunk Light', ylabel='count'>
In [17]:
main["Trunk Light"]=main["Trunk Light"].apply(lambda x:preprocess_to_null_out(x,False))
In [18]:
main["Rear Seat Headrest"]
Out[18]:
0 YES
1 YES
2 YES
3 YES
4 YES
...
105797 YES
105798 YES
105799 YES
105800 YES
105801 YES
Name: Rear Seat Headrest, Length: 105802, dtype: object
In [19]:
main["Rear Seat Headrest"].unique()
Out[19]:
array(['YES', nan, 'NO', '5', '4', '2765', '3 Zone'], dtype=object)
In [20]:
plt.figure(figsize=(15,5))
sns.countplot(main["Rear Seat Headrest"])
Out[20]:
<AxesSubplot:xlabel='Rear Seat Headrest', ylabel='count'>
In [21]:
main["Rear Seat Headrest"]=main["Rear Seat Headrest"].apply(lambda x:preprocess_to_null_out(x,False))
In [1167]:
#Drop duplicates rows again beacuse of place column
main=main.drop_duplicates()
In [1168]:
#Displacement (cc) and Engine Displacement (cc) column are same
main.drop(["Displacement (cc)"],axis=1,inplace=True)
In [1169]:
# No. of cylinder and Valves Per Cylinder column are same
main.drop(["Valves Per Cylinder"],axis=1,inplace=True)
In [1170]:
# ARAI Mileage and Petrol Mileage (ARAI) column are same
main.drop(["Petrol Mileage (ARAI)"],axis=1,inplace=True)
In [3]:
main.drop(["Petrol Fuel Tank Capacity (Litres)"],axis=1,inplace=True)
main.drop(["Power Windows-Rear"],axis=1,inplace=True)
main.drop(["Power Windows-Front"],axis=1,inplace=True)
In [ ]:
main.to_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//Preprocessed_datas.csv")
Missing Imputation¶
All the Missing values in the columns are based on the MCAR(Missing completely at random),so i am using the MCAR tecniques like
1.Dropping Features
2.Mean,Mode,Medium
3.End of tail distribution
4.Minimum imputation
5.Maximum imputation
Here we are dealing with MCAR so imputation is done independently on the each Brand then only we can make correct guess
In [40]:
main.isnull().sum()
Out[40]:
Model 0
Brand 0
Varient 0
ARAI Mileage(Km/L) 24974
...
BHP 4455
RPM 4455
NM 4555
NM_RPM 4555
Length: 147, dtype: int64
In [44]:
ms.bar(main)
Out[44]:
<AxesSubplot:>
In [45]:
pd.set_option("display.max_rows",8)
In [46]:
calculate_missing_percentage(main)
Out[46]:
| Fearure | Percentage | |
|---|---|---|
| 0 | Model | 0.000000 |
| 1 | Brand | 0.000000 |
| 2 | Varient | 0.000000 |
| 3 | ARAI Mileage(Km/L) | 0.236045 |
| ... | ... | ... |
| 143 | BHP | 0.042107 |
| 144 | RPM | 0.042107 |
| 145 | NM | 0.043052 |
| 146 | NM_RPM | 0.043052 |
147 rows × 2 columns
In [23]:
main=main.sort_values(by="Brand")
In [24]:
Filled_DataFrame=pd.DataFrame()
In [25]:
def fill_values_for_numericals(models,brands,column,enabler):
global Filled_DataFrame
entire=main["{}".format(column)]
entire=entire.loc[entire.isnull()==False]
com=main.loc[(main["Model"]=="{}".format(models))]["{}".format(column)]
com=com.loc[com.isnull()==False]
com1=main.loc[(main["Model"]=="{}".format(models))&(main["Brand"]=="{}".format(brands))][["Brand","{}".format(column)]]
computed_column=com1["{}".format(column)]
if computed_column.isnull().any()==True and computed_column.isnull().all()==True:
if com.isnull().all()==True:
if enabler==True:
temp_=entire.apply(lambda x:str(x))
com1["{}".format(column)].fillna(float(temp_.mode()),inplace=True)
Filled_DataFrame=pd.concat([Filled_DataFrame,com1],axis=0)
else:
skew_value=entire.skew()
if skew_value<-0.5 or skew_value>0.5:
com1["{}".format(column)].fillna(entire.median(),inplace=True)
Filled_DataFrame=pd.concat([Filled_DataFrame,com1],axis=0)
else:
com1["{}".format(column)].fillna(entire.mean(),inplace=True)
Filled_DataFrame=pd.concat([Filled_DataFrame,com1],axis=0)
else:
if enabler==True:
temp_=com.apply(lambda x:str(x))
com1["{}".format(column)].fillna(float(temp_.mode()),inplace=True)
Filled_DataFrame=pd.concat([Filled_DataFrame,com1],axis=0)
else:
skew_value=com.skew()
if skew_value<-0.5 or skew_value>0.5:
com1["{}".format(column)].fillna(com.median(),inplace=True)
Filled_DataFrame=pd.concat([Filled_DataFrame,com1],axis=0)
else:
com1["{}".format(column)].fillna(com.mean(),inplace=True)
Filled_DataFrame=pd.concat([Filled_DataFrame,com1],axis=0)
else:
com1_without_null=computed_column.loc[computed_column.isnull()==False]
if enabler==True:
temp_=com1_without_null.apply(lambda x:str(x))
com1["{}".format(column)].fillna(float(temp_.mode()[0]),inplace=True)
Filled_DataFrame=pd.concat([Filled_DataFrame,com1],axis=0)
else:
skew_value=com1_without_null.skew()
if skew_value<-0.5 or skew_value>0.5:
com1["{}".format(column)].fillna(com1_without_null.median(),inplace=True)
Filled_DataFrame=pd.concat([Filled_DataFrame,com1],axis=0)
else:
com1["{}".format(column)].fillna(com1_without_null.mean(),inplace=True)
Filled_DataFrame=pd.concat([Filled_DataFrame,com1],axis=0)
In [26]:
def prepare_data_for_create_dataframe(data):
models=[]
brands=[]
for i in data:
models.append(i[0])
brands.append(i[1])
return pd.DataFrame({
"Model":models,
"Brand":brands
})
In [27]:
non_repitation_data=list(set(zip(main[["Model","Brand"]]["Model"],main[["Model","Brand"]]["Brand"])))
In [28]:
prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand")
Out[28]:
| Model | Brand | |
|---|---|---|
| 59 | BMW | 2 Series |
| 94 | Ferrari | 296 GTB |
| 211 | BMW | 3 Series |
| 58 | BMW | 5 Series |
| 228 | BMW | 6 Series |
| ... | ... | ... |
| 98 | Audi | e-tron GT |
| 79 | Hyundai | i20 |
| 56 | Hyundai | i20 N Line |
| 113 | BMW | i4 |
| 217 | BMW | iX |
248 rows × 2 columns
In [29]:
len(non_repitation_data)
Out[29]:
248
In [30]:
len(main["Brand"].unique())
Out[30]:
248
In [31]:
main.shape
Out[31]:
(105802, 148)
In [32]:
Filled_DataFrame=pd.DataFrame()
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_values_for_numericals(x["Model"],x["Brand"],"ARAI Mileage(Km/L)",False),axis=1)
In [33]:
main[["Model","Brand","ARAI Mileage(Km/L)"]].iloc[100]
Out[33]:
Model BMW Brand 2 Series ARAI Mileage(Km/L) 14.82 Name: 28373, dtype: object
In [34]:
Filled_DataFrame["ARAI Mileage(Km/L)"].isnull().sum()
Out[34]:
0
In [35]:
main["ARAI Mileage(Km/L)"]=Filled_DataFrame["ARAI Mileage(Km/L)"]
In [36]:
Filled_DataFrame[["Brand","ARAI Mileage(Km/L)"]].iloc[100]
Out[36]:
Brand 2 Series ARAI Mileage(Km/L) 14.82 Name: 28373, dtype: object
In [37]:
main["ARAI Mileage(Km/L)"].isnull().sum()
Out[37]:
0
In [38]:
# Engine Displacement (cc)
Filled_DataFrame=pd.DataFrame()
main["Engine Displacement (cc)"].isnull().sum()
Out[38]:
6703
In [39]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_values_for_numericals(x["Model"],x["Brand"],"Engine Displacement (cc)",False),axis=1)
In [40]:
main[["Model","Brand","Engine Displacement (cc)"]].iloc[101921]
Out[40]:
Model Mahindra Brand XUV700 Engine Displacement (cc) 2198.0 Name: 61152, dtype: object
In [41]:
Filled_DataFrame["Engine Displacement (cc)"].isnull().sum()
Out[41]:
0
In [42]:
main["Engine Displacement (cc)"]=Filled_DataFrame["Engine Displacement (cc)"]
In [43]:
Filled_DataFrame[["Brand","Engine Displacement (cc)"]].iloc[101921]
Out[43]:
Brand XUV700 Engine Displacement (cc) 2198.0 Name: 61152, dtype: object
In [44]:
main["Engine Displacement (cc)"].isnull().sum()
Out[44]:
0
In [45]:
main.columns[12]
Out[45]:
'Fuel Tank Capacity'
In [46]:
# Boot Space (Litres)
Filled_DataFrame=pd.DataFrame()
main["Fuel Tank Capacity"]
Out[46]:
26993 51.0
27910 51.0
27903 51.0
28435 50.0
28120 50.0
...
27057 NaN
27583 NaN
27572 NaN
26558 NaN
26874 NaN
Name: Fuel Tank Capacity, Length: 105802, dtype: float64
In [47]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_values_for_numericals(x["Model"],x["Brand"],"Fuel Tank Capacity",False),axis=1)
In [48]:
main[["Model","Brand","Fuel Tank Capacity"]].iloc[79190]
Out[48]:
Model Mahindra Brand Scorpio-N Fuel Tank Capacity NaN Name: 60831, dtype: object
In [49]:
Filled_DataFrame["Fuel Tank Capacity"].isnull().sum()
Out[49]:
0
In [50]:
main["Fuel Tank Capacity"]=Filled_DataFrame["Fuel Tank Capacity"]
In [51]:
Filled_DataFrame[["Brand","Fuel Tank Capacity"]].iloc[79190]
Out[51]:
Brand Scorpio-N Fuel Tank Capacity 52.378078 Name: 60831, dtype: object
In [52]:
main["Fuel Tank Capacity"].isnull().sum()
Out[52]:
0
In [59]:
main["Fuel Tank Capacity"].isnull().sum()
Out[59]:
0
In [60]:
# Length (mm)
Filled_DataFrame=pd.DataFrame()
main["Length (mm)"]
Out[60]:
26993 4526.0
27910 4526.0
27903 4526.0
28435 4526.0
28120 4526.0
...
27057 4953.0
27583 4953.0
27572 4953.0
26558 4953.0
26874 4953.0
Name: Length (mm), Length: 105802, dtype: float64
In [61]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_values_for_numericals(x["Model"],x["Brand"],"Length (mm)",False),axis=1)
In [62]:
main[["Model","Brand","Length (mm)"]].iloc[79190]
Out[62]:
Model Mahindra Brand Scorpio-N Length (mm) 4662.0 Name: 60831, dtype: object
In [63]:
Filled_DataFrame["Length (mm)"].isnull().sum()
Out[63]:
0
In [64]:
main["Length (mm)"]=Filled_DataFrame["Length (mm)"]
In [65]:
Filled_DataFrame[["Brand","Length (mm)"]].iloc[79190]
Out[65]:
Brand Scorpio-N Length (mm) 4662.0 Name: 60831, dtype: object
In [66]:
main["Length (mm)"].isnull().sum()
Out[66]:
0
In [67]:
# Width (mm)
Filled_DataFrame=pd.DataFrame()
main["Width (mm)"]
Out[67]:
26993 2081.0
27910 2081.0
27903 2081.0
28435 2081.0
28120 2081.0
...
27057 2230.0
27583 2230.0
27572 2230.0
26558 2230.0
26874 2230.0
Name: Width (mm), Length: 105802, dtype: float64
In [68]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_values_for_numericals(x["Model"],x["Brand"],"Width (mm)",False),axis=1)
In [69]:
main[["Model","Brand","Width (mm)"]].iloc[79190]
Out[69]:
Model Mahindra Brand Scorpio-N Width (mm) 1917.0 Name: 60831, dtype: object
In [70]:
Filled_DataFrame["Width (mm)"].isnull().sum()
Out[70]:
0
In [71]:
main["Width (mm)"]=Filled_DataFrame["Width (mm)"]
In [72]:
Filled_DataFrame[["Brand","Width (mm)"]].iloc[79190]
Out[72]:
Brand Scorpio-N Width (mm) 1917.0 Name: 60831, dtype: object
In [73]:
# Height (mm)
Filled_DataFrame=pd.DataFrame()
main["Height (mm)"]
Out[73]:
26993 1420.0
27910 1420.0
27903 1420.0
28435 1420.0
28120 1420.0
...
27057 1695.0
27583 1695.0
27572 1695.0
26558 1695.0
26874 1695.0
Name: Height (mm), Length: 105802, dtype: float64
In [74]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_values_for_numericals(x["Model"],x["Brand"],"Height (mm)",False),axis=1)
In [76]:
Filled_DataFrame["Height (mm)"].isnull().sum()
Out[76]:
0
In [77]:
main["Height (mm)"]=Filled_DataFrame["Height (mm)"]
In [79]:
# City Mileage(Km/L)
Filled_DataFrame=pd.DataFrame()
main["City Mileage(Km/L)"]
Out[79]:
26993 NaN
27910 NaN
27903 NaN
28435 NaN
28120 NaN
..
27057 NaN
27583 NaN
27572 NaN
26558 NaN
26874 NaN
Name: City Mileage(Km/L), Length: 105802, dtype: float64
In [80]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_values_for_numericals(x["Model"],x["Brand"],"City Mileage(Km/L)",False),axis=1)
In [81]:
main[["Model","Brand","City Mileage(Km/L)"]].iloc[79190]
Out[81]:
Model Mahindra Brand Scorpio-N City Mileage(Km/L) NaN Name: 60831, dtype: object
In [82]:
Filled_DataFrame["City Mileage(Km/L)"].isnull().sum()
Out[82]:
0
In [83]:
main["City Mileage(Km/L)"]=Filled_DataFrame["City Mileage(Km/L)"]
In [84]:
Filled_DataFrame[["Brand","City Mileage(Km/L)"]].iloc[79190]
Out[84]:
Brand Scorpio-N City Mileage(Km/L) 13.827467 Name: 60831, dtype: object
In [85]:
Filled_DataFrame=pd.DataFrame()
main["Boot Space (Litres)"]
Out[85]:
26993 NaN
27910 NaN
27903 NaN
28435 NaN
28120 NaN
..
27057 NaN
27583 NaN
27572 NaN
26558 NaN
26874 NaN
Name: Boot Space (Litres), Length: 105802, dtype: float64
In [86]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_values_for_numericals(x["Model"],x["Brand"],"Boot Space (Litres)",False),axis=1)
In [87]:
main[["Model","Brand","Boot Space (Litres)"]].iloc[34503]
Out[87]:
Model Ford Brand Fiesta Boot Space (Litres) 430.0 Name: 21257, dtype: object
In [88]:
Filled_DataFrame["Boot Space (Litres)"].isnull().sum()
Out[88]:
0
In [89]:
main["Boot Space (Litres)"]=Filled_DataFrame["Boot Space (Litres)"]
In [90]:
Filled_DataFrame[["Brand","Boot Space (Litres)"]].iloc[34503]
Out[90]:
Brand Fiesta Boot Space (Litres) 430.0 Name: 21257, dtype: object
In [91]:
# BHP
Filled_DataFrame=pd.DataFrame()
main["BHP"]
Out[91]:
26993 187.74
27910 187.74
27903 187.74
28435 189.08
28120 189.08
...
27057 321.84
27583 321.84
27572 321.84
26558 321.84
26874 321.84
Name: BHP, Length: 105802, dtype: float64
In [92]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_values_for_numericals(x["Model"],x["Brand"],"BHP",False),axis=1)
In [93]:
main[["Model","Brand","BHP"]].iloc[79190]
Out[93]:
Model Mahindra Brand Scorpio-N BHP NaN Name: 60831, dtype: object
In [94]:
Filled_DataFrame["BHP"].isnull().sum()
Out[94]:
0
In [95]:
main["BHP"]=Filled_DataFrame["BHP"]
In [96]:
Filled_DataFrame[["Brand","BHP"]].iloc[79190]
Out[96]:
Brand Scorpio-N BHP 133.238545 Name: 60831, dtype: object
In [97]:
# RPM
Filled_DataFrame=pd.DataFrame()
main["RPM"]
Out[97]:
26993 4000.0
27910 4000.0
27903 4000.0
28435 5000.0
28120 5000.0
...
27057 0.0
27583 0.0
27572 0.0
26558 0.0
26874 0.0
Name: RPM, Length: 105802, dtype: float64
In [98]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_values_for_numericals(x["Model"],x["Brand"],"RPM",False),axis=1)
In [99]:
main[["Model","Brand","RPM"]].iloc[79190]
Out[99]:
Model Mahindra Brand Scorpio-N RPM NaN Name: 60831, dtype: object
In [100]:
Filled_DataFrame["RPM"].isnull().sum()
Out[100]:
0
In [101]:
main["RPM"]=Filled_DataFrame["RPM"]
In [102]:
Filled_DataFrame[["Brand","RPM"]].iloc[79190]
Out[102]:
Brand Scorpio-N RPM 3750.0 Name: 60831, dtype: object
In [103]:
# NM
Filled_DataFrame=pd.DataFrame()
main["NM"]
Out[103]:
26993 400.0
27910 400.0
27903 400.0
28435 280.0
28120 280.0
...
27057 630.0
27583 630.0
27572 630.0
26558 630.0
26874 630.0
Name: NM, Length: 105802, dtype: float64
In [104]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_values_for_numericals(x["Model"],x["Brand"],"NM",False),axis=1)
In [105]:
main[["Model","Brand","NM"]].iloc[79190]
Out[105]:
Model Mahindra Brand Scorpio-N NM NaN Name: 60831, dtype: object
In [106]:
Filled_DataFrame["NM"].isnull().sum()
Out[106]:
0
In [107]:
main["NM"]=Filled_DataFrame["NM"]
In [108]:
Filled_DataFrame[["Brand","NM"]].iloc[79190]
Out[108]:
Brand Scorpio-N NM 299.823959 Name: 60831, dtype: object
In [109]:
# NM_RPM
Filled_DataFrame=pd.DataFrame()
main["NM_RPM"]
Out[109]:
26993 2125.0
27910 2125.0
27903 2125.0
28435 2975.0
28120 2975.0
...
27057 0.0
27583 0.0
27572 0.0
26558 0.0
26874 0.0
Name: NM_RPM, Length: 105802, dtype: float64
In [110]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_values_for_numericals(x["Model"],x["Brand"],"NM_RPM",False),axis=1)
In [111]:
main[["Model","Brand","NM_RPM"]].iloc[79190]
Out[111]:
Model Mahindra Brand Scorpio-N NM_RPM NaN Name: 60831, dtype: object
In [112]:
Filled_DataFrame["NM_RPM"].isnull().sum()
Out[112]:
0
In [113]:
main["NM_RPM"]=Filled_DataFrame["NM_RPM"]
In [114]:
Filled_DataFrame[["Brand","NM_RPM"]].iloc[79190]
Out[114]:
Brand Scorpio-N NM_RPM 2200.0 Name: 60831, dtype: object
In [115]:
# Seating Capacity
Filled_DataFrame=pd.DataFrame()
main["Seating Capacity"]
Out[115]:
26993 5.0
27910 5.0
27903 5.0
28435 5.0
28120 5.0
...
27057 5.0
27583 5.0
27572 5.0
26558 5.0
26874 5.0
Name: Seating Capacity, Length: 105802, dtype: float64
In [116]:
main["Seating Capacity"].unique()
Out[116]:
array([ 5., 2., 4., nan, 6., 7., 0., 8.])
In [117]:
def remove_zero_into_null(text):
if text==0.0:
return np.nan
return text
In [118]:
main["Seating Capacity"]=main["Seating Capacity"].apply(remove_zero_into_null)
In [119]:
main["Seating Capacity"].unique()
Out[119]:
array([ 5., 2., 4., nan, 6., 7., 8.])
In [120]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_values_for_numericals(x["Model"],x["Brand"],"Seating Capacity",True),axis=1)
In [121]:
main[["Model","Brand","Seating Capacity"]].iloc[34503]
Out[121]:
Model Ford Brand Fiesta Seating Capacity 5.0 Name: 21257, dtype: object
In [122]:
Filled_DataFrame["Seating Capacity"].isnull().sum()
Out[122]:
0
In [123]:
main["Seating Capacity"]=Filled_DataFrame["Seating Capacity"]
In [124]:
Filled_DataFrame[["Brand","Seating Capacity"]].iloc[34503]
Out[124]:
Brand Fiesta Seating Capacity 5.0 Name: 21257, dtype: object
In [125]:
main["Seating Capacity"].unique()
Out[125]:
array([5., 2., 4., 6., 7., 8.])
In [126]:
Filled_DataFrame=pd.DataFrame()
main["No. of cylinder"]
Out[126]:
26993 4.0
27910 4.0
27903 4.0
28435 4.0
28120 4.0
...
27057 NaN
27583 NaN
27572 NaN
26558 NaN
26874 NaN
Name: No. of cylinder, Length: 105802, dtype: float64
In [127]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_values_for_numericals(x["Model"],x["Brand"],"No. of cylinder",True),axis=1)
In [128]:
main[["Model","Brand","No. of cylinder"]].iloc[34503]
Out[128]:
Model Ford Brand Fiesta No. of cylinder 4.0 Name: 21257, dtype: object
In [129]:
Filled_DataFrame["No. of cylinder"].isnull().sum()
Out[129]:
0
In [130]:
main["No. of cylinder"]=Filled_DataFrame["No. of cylinder"]
In [131]:
Filled_DataFrame[["Brand","No. of cylinder"]].iloc[345]
Out[131]:
Brand 3 Series No. of cylinder 4.0 Name: 27951, dtype: object
In [132]:
def fill_catagorical_values(models,brands,column):
global Filled_DataFrame
entire=main["{}".format(column)]
entire=entire.loc[entire.isnull()==False]
com=main.loc[(main["Model"]=="{}".format(models))]["{}".format(column)]
com=com.loc[com.isnull()==False]
com1=main.loc[(main["Model"]=="{}".format(models))&(main["Brand"]=="{}".format(brands))][["Brand","{}".format(column)]]
computed_column=com1["{}".format(column)]
if computed_column.isnull().any()==True and computed_column.isnull().all()==True:
if com.isnull().all()==True:
com1["{}".format(column)].fillna(entire.mode()[0],inplace=True)
Filled_DataFrame=pd.concat([Filled_DataFrame,com1],axis=0)
else:
com1["{}".format(column)].fillna(com.mode()[0],inplace=True)
Filled_DataFrame=pd.concat([Filled_DataFrame,com1],axis=0)
else:
com1_without_null=computed_column.loc[computed_column.isnull()==False]
com1["{}".format(column)].fillna(com1_without_null.mode()[0],inplace=True)
Filled_DataFrame=pd.concat([Filled_DataFrame,com1],axis=0)
In [133]:
Filled_DataFrame=pd.DataFrame()
main["Body Type"]
Out[133]:
26993 Sedan
27910 Sedan
27903 Sedan
28435 Sedan
28120 Sedan
...
27057 SUV
27583 SUV
27572 SUV
26558 SUV
26874 SUV
Name: Body Type, Length: 105802, dtype: object
In [134]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Body Type"),axis=1)
In [135]:
main["Body Type"]=Filled_DataFrame["Body Type"]
In [136]:
# Multi-function Steering Wheel
Filled_DataFrame=pd.DataFrame()
main["Multi-function Steering Wheel"]
Out[136]:
26993 YES
27910 YES
27903 YES
28435 YES
28120 YES
...
27057 YES
27583 YES
27572 YES
26558 YES
26874 YES
Name: Multi-function Steering Wheel, Length: 105802, dtype: object
In [137]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Multi-function Steering Wheel"),axis=1)
In [138]:
main["Multi-function Steering Wheel"]=Filled_DataFrame["Multi-function Steering Wheel"]
In [139]:
# Touch Screen
Filled_DataFrame=pd.DataFrame()
main["Touch Screen"]
Out[139]:
26993 YES
27910 YES
27903 YES
28435 YES
28120 YES
...
27057 YES
27583 YES
27572 YES
26558 YES
26874 YES
Name: Touch Screen, Length: 105802, dtype: object
In [140]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Touch Screen"),axis=1)
In [141]:
main["Touch Screen"]=Filled_DataFrame["Touch Screen"]
In [142]:
# Engine Start Stop Button
Filled_DataFrame=pd.DataFrame()
main["Engine Start Stop Button"].isnull().sum()
Out[142]:
13716
In [143]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Engine Start Stop Button"),axis=1)
In [144]:
main["Engine Start Stop Button"]=Filled_DataFrame["Engine Start Stop Button"]
In [145]:
# Alloy Wheels
Filled_DataFrame=pd.DataFrame()
main["Alloy Wheels"].isnull().sum()
Out[145]:
5901
In [146]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Alloy Wheels"),axis=1)
In [147]:
main["Alloy Wheels"]=Filled_DataFrame["Alloy Wheels"]
In [148]:
# Power Windows Rear
Filled_DataFrame=pd.DataFrame()
main["Power Windows Rear"].isnull().sum()
Out[148]:
5948
In [149]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Power Windows Rear"),axis=1)
In [150]:
main["Power Windows Rear"]=Filled_DataFrame["Power Windows Rear"]
In [151]:
# Wheel Covers
Filled_DataFrame=pd.DataFrame()
main["Wheel Covers"].isnull().sum()
Out[151]:
19999
In [152]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Wheel Covers"),axis=1)
In [153]:
main["Wheel Covers"]=Filled_DataFrame["Wheel Covers"]
In [154]:
# Driver Airbag
Filled_DataFrame=pd.DataFrame()
main["Driver Airbag"].isnull().sum()
Out[154]:
2930
In [155]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Driver Airbag"),axis=1)
In [156]:
main["Driver Airbag"]=Filled_DataFrame["Driver Airbag"]
In [157]:
main["Wheel Covers"].isnull().sum()
Out[157]:
0
In [158]:
# Air Conditioner
Filled_DataFrame=pd.DataFrame()
main["Air Conditioner"].isnull().sum()
Out[158]:
2235
In [159]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Air Conditioner"),axis=1)
In [160]:
main["Air Conditioner"]=Filled_DataFrame["Air Conditioner"]
In [161]:
# Power Adjustable Exterior Rear View Mirror
Filled_DataFrame=pd.DataFrame()
main["Power Adjustable Exterior Rear View Mirror"].isnull().sum()
Out[161]:
3260
In [162]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Power Adjustable Exterior Rear View Mirror"),axis=1)
In [163]:
main["Power Adjustable Exterior Rear View Mirror"]=Filled_DataFrame["Power Adjustable Exterior Rear View Mirror"]
In [164]:
# Automatic Climate Control
Filled_DataFrame=pd.DataFrame()
main["Automatic Climate Control"].isnull().sum()
Out[164]:
10347
In [165]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Automatic Climate Control"),axis=1)
In [166]:
main["Automatic Climate Control"]=Filled_DataFrame["Automatic Climate Control"]
In [167]:
# Anti Lock Braking System
Filled_DataFrame=pd.DataFrame()
main["Anti Lock Braking System"].isnull().sum()
Out[167]:
3592
In [168]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Anti Lock Braking System"),axis=1)
In [169]:
main["Anti Lock Braking System"]=Filled_DataFrame["Anti Lock Braking System"]
In [170]:
# Fog Lights - Front
Filled_DataFrame=pd.DataFrame()
main["Fog Lights - Front"].isnull().sum()
Out[170]:
17867
In [171]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Fog Lights - Front"),axis=1)
In [172]:
main["Fog Lights - Front"]=Filled_DataFrame["Fog Lights - Front"]
In [173]:
# Power Windows Front
Filled_DataFrame=pd.DataFrame()
main["Power Windows Front"].isnull().sum()
Out[173]:
3336
In [174]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Power Windows Front"),axis=1)
In [175]:
main["Power Windows Front"]=Filled_DataFrame["Power Windows Front"]
In [176]:
# Passenger Airbag
Filled_DataFrame=pd.DataFrame()
main["Passenger Airbag"].isnull().sum()
Out[176]:
1574
In [177]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Passenger Airbag"),axis=1)
In [178]:
main["Passenger Airbag"]=Filled_DataFrame["Passenger Airbag"]
In [179]:
# Power Steering
Filled_DataFrame=pd.DataFrame()
main["Power Steering"].isnull().sum()
Out[179]:
8957
In [180]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Power Steering"),axis=1)
In [181]:
main["Power Steering"]=Filled_DataFrame["Power Steering"]
In [182]:
# Engine Type
Filled_DataFrame=pd.DataFrame()
main["Engine Type"].isnull().sum()
Out[182]:
5964
In [183]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Engine Type"),axis=1)
main["Engine Type"]=Filled_DataFrame["Engine Type"]
In [184]:
# Emission Norm Compliance
Filled_DataFrame=pd.DataFrame()
main["Emission Norm Compliance"].isnull().sum()
Out[184]:
2344
In [185]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Emission Norm Compliance"),axis=1)
main["Emission Norm Compliance"]=Filled_DataFrame["Emission Norm Compliance"]
In [186]:
# Front Suspension
Filled_DataFrame=pd.DataFrame()
main["Front Suspension"].isnull().sum()
Out[186]:
2656
In [187]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Front Suspension"),axis=1)
main["Front Suspension"]=Filled_DataFrame["Front Suspension"]
In [188]:
# Rear Suspension
Filled_DataFrame=pd.DataFrame()
main["Rear Suspension"].isnull().sum()
Out[188]:
3030
In [189]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Rear Suspension"),axis=1)
main["Rear Suspension"]=Filled_DataFrame["Rear Suspension"]
In [190]:
# Steering Type
Filled_DataFrame=pd.DataFrame()
main["Steering Type"].isnull().sum()
Out[190]:
14349
In [191]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Steering Type"),axis=1)
main["Steering Type"]=Filled_DataFrame["Steering Type"]
In [192]:
# Steering Column
Filled_DataFrame=pd.DataFrame()
main["Steering Column"].isnull().sum()
Out[192]:
14258
In [193]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Steering Column"),axis=1)
main["Steering Column"]=Filled_DataFrame["Steering Column"]
In [194]:
# Front Brake Type
Filled_DataFrame=pd.DataFrame()
main["Front Brake Type"].isnull().sum()
Out[194]:
2693
In [195]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Front Brake Type"),axis=1)
main["Front Brake Type"]=Filled_DataFrame["Front Brake Type"]
In [196]:
# Rear Brake Type
Filled_DataFrame=pd.DataFrame()
main["Rear Brake Type"].isnull().sum()
Out[196]:
5010
In [197]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Rear Brake Type"),axis=1)
main["Rear Brake Type"]=Filled_DataFrame["Rear Brake Type"]
In [202]:
# Heater
Filled_DataFrame=pd.DataFrame()
main["Heater"].isnull().sum()
Out[202]:
4065
In [203]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Heater"),axis=1)
main["Heater"]=Filled_DataFrame["Heater"]
In [204]:
# Adjustable Steering
Filled_DataFrame=pd.DataFrame()
main["Adjustable Steering"].isnull().sum()
Out[204]:
12822
In [205]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Adjustable Steering"),axis=1)
main["Adjustable Steering"]=Filled_DataFrame["Adjustable Steering"]
In [206]:
# Low Fuel Warning Light
Filled_DataFrame=pd.DataFrame()
main["Low Fuel Warning Light"].isnull().sum()
Out[206]:
4155
In [207]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Low Fuel Warning Light"),axis=1)
main["Low Fuel Warning Light"]=Filled_DataFrame["Low Fuel Warning Light"]
In [208]:
# Accessory Power Outlet
Filled_DataFrame=pd.DataFrame()
main["Accessory Power Outlet"].isnull().sum()
Out[208]:
2813
In [209]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Accessory Power Outlet"),axis=1)
main["Accessory Power Outlet"]=Filled_DataFrame["Accessory Power Outlet"]
In [210]:
# Trunk Light
Filled_DataFrame=pd.DataFrame()
main["Trunk Light"].isnull().sum()
Out[210]:
43805
In [211]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Trunk Light"),axis=1)
main["Trunk Light"]=Filled_DataFrame["Trunk Light"]
In [212]:
# Rear Seat Headrest
Filled_DataFrame=pd.DataFrame()
main["Rear Seat Headrest"].isnull().sum()
Out[212]:
5122
In [213]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Rear Seat Headrest"),axis=1)
main["Rear Seat Headrest"]=Filled_DataFrame["Rear Seat Headrest"]
In [214]:
# Adjustable Headrest
Filled_DataFrame=pd.DataFrame()
main["Adjustable Headrest"].isnull().sum()
Out[214]:
25085
In [215]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Adjustable Headrest"),axis=1)
main["Adjustable Headrest"]=Filled_DataFrame["Adjustable Headrest"]
In [216]:
# Rear Seat Centre Arm Rest
Filled_DataFrame=pd.DataFrame()
main["Rear Seat Centre Arm Rest"].isnull().sum()
Out[216]:
21040
In [217]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Rear Seat Centre Arm Rest"),axis=1)
main["Rear Seat Centre Arm Rest"]=Filled_DataFrame["Rear Seat Centre Arm Rest"]
In [218]:
# Height Adjustable Front Seat Belts
Filled_DataFrame=pd.DataFrame()
main["Height Adjustable Front Seat Belts"].isnull().sum()
Out[218]:
46851
In [219]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Height Adjustable Front Seat Belts"),axis=1)
main["Height Adjustable Front Seat Belts"]=Filled_DataFrame["Height Adjustable Front Seat Belts"]
In [220]:
# Cup Holders-Rear
Filled_DataFrame=pd.DataFrame()
main["Cup Holders-Rear"].isnull().sum()
Out[220]:
28202
In [221]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Cup Holders-Rear"),axis=1)
main["Cup Holders-Rear"]=Filled_DataFrame["Cup Holders-Rear"]
In [222]:
# Rear AC Vents
Filled_DataFrame=pd.DataFrame()
main["Rear AC Vents"].isnull().sum()
Out[222]:
20459
In [223]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Rear AC Vents"),axis=1)
main["Rear AC Vents"]=Filled_DataFrame["Rear AC Vents"]
In [224]:
# Seat Lumbar Support
Filled_DataFrame=pd.DataFrame()
main["Seat Lumbar Support"].isnull().sum()
Out[224]:
25211
In [225]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Seat Lumbar Support"),axis=1)
main["Seat Lumbar Support"]=Filled_DataFrame["Seat Lumbar Support"]
In [226]:
# Cruise Control
Filled_DataFrame=pd.DataFrame()
main["Cruise Control"].isnull().sum()
Out[226]:
14583
In [227]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Cruise Control"),axis=1)
main["Cruise Control"]=Filled_DataFrame["Cruise Control"]
In [228]:
# Cruise Control
Filled_DataFrame=pd.DataFrame()
main["Smart Access Card Entry"].isnull().sum()
Out[228]:
22685
In [229]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Smart Access Card Entry"),axis=1)
main["Smart Access Card Entry"]=Filled_DataFrame["Smart Access Card Entry"]
In [230]:
# KeyLess Entry
Filled_DataFrame=pd.DataFrame()
main["KeyLess Entry"].isnull().sum()
Out[230]:
2584
In [231]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"KeyLess Entry"),axis=1)
main["KeyLess Entry"]=Filled_DataFrame["KeyLess Entry"]
In [232]:
# Engine Start/Stop Button
Filled_DataFrame=pd.DataFrame()
main["Engine Start/Stop Button"].isnull().sum()
Out[232]:
11799
In [233]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Engine Start/Stop Button"),axis=1)
main["Engine Start/Stop Button"]=Filled_DataFrame["Engine Start/Stop Button"]
In [234]:
# Glove Box Cooling
Filled_DataFrame=pd.DataFrame()
main["Glove Box Cooling"].isnull().sum()
Out[234]:
37695
In [235]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Glove Box Cooling"),axis=1)
main["Glove Box Cooling"]=Filled_DataFrame["Glove Box Cooling"]
In [236]:
# Voice Control
Filled_DataFrame=pd.DataFrame()
main["Voice Control"].isnull().sum()
Out[236]:
27901
In [237]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Voice Control"),axis=1)
main["Voice Control"]=Filled_DataFrame["Voice Control"]
In [238]:
# Gear Shift Indicator
Filled_DataFrame=pd.DataFrame()
main["Gear Shift Indicator"].isnull().sum()
Out[238]:
52908
In [239]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Gear Shift Indicator"),axis=1)
main["Gear Shift Indicator"]=Filled_DataFrame["Gear Shift Indicator"]
In [240]:
# Tachometer
Filled_DataFrame=pd.DataFrame()
main["Tachometer"].isnull().sum()
Out[240]:
14511
In [241]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Tachometer"),axis=1)
main["Tachometer"]=Filled_DataFrame["Tachometer"]
In [242]:
# Electronic Multi-Tripmeter
Filled_DataFrame=pd.DataFrame()
main["Electronic Multi-Tripmeter"].isnull().sum()
Out[242]:
5336
In [243]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Electronic Multi-Tripmeter"),axis=1)
main["Electronic Multi-Tripmeter"]=Filled_DataFrame["Electronic Multi-Tripmeter"]
In [244]:
# Fabric Upholstery
Filled_DataFrame=pd.DataFrame()
main["Fabric Upholstery"].isnull().sum()
Out[244]:
10497
In [245]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Fabric Upholstery"),axis=1)
main["Fabric Upholstery"]=Filled_DataFrame["Fabric Upholstery"]
In [246]:
# Leather Steering Wheel
Filled_DataFrame=pd.DataFrame()
main["Leather Steering Wheel"].isnull().sum()
Out[246]:
16480
In [247]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Leather Steering Wheel"),axis=1)
main["Leather Steering Wheel"]=Filled_DataFrame["Leather Steering Wheel"]
In [248]:
# Glove Compartment
Filled_DataFrame=pd.DataFrame()
main["Glove Compartment"].isnull().sum()
Out[248]:
1627
In [249]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Glove Compartment"),axis=1)
main["Glove Compartment"]=Filled_DataFrame["Glove Compartment"]
In [250]:
# Digital Clock
Filled_DataFrame=pd.DataFrame()
main["Digital Clock"].isnull().sum()
Out[250]:
5408
In [251]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Digital Clock"),axis=1)
main["Digital Clock"]=Filled_DataFrame["Digital Clock"]
In [252]:
# Digital Odometer
Filled_DataFrame=pd.DataFrame()
main["Digital Odometer"].isnull().sum()
Out[252]:
14334
In [253]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Digital Odometer"),axis=1)
main["Digital Odometer"]=Filled_DataFrame["Digital Odometer"]
In [254]:
# Height Adjustable Driver Seat
Filled_DataFrame=pd.DataFrame()
main["Height Adjustable Driver Seat"].isnull().sum()
Out[254]:
14696
In [255]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Height Adjustable Driver Seat"),axis=1)
main["Height Adjustable Driver Seat"]=Filled_DataFrame["Height Adjustable Driver Seat"]
In [256]:
# Dual Tone Dashboard
Filled_DataFrame=pd.DataFrame()
main["Dual Tone Dashboard"].isnull().sum()
Out[256]:
27826
In [257]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Dual Tone Dashboard"),axis=1)
main["Dual Tone Dashboard"]=Filled_DataFrame["Dual Tone Dashboard"]
In [258]:
# Adjustable Headlights
Filled_DataFrame=pd.DataFrame()
main["Adjustable Headlights"].isnull().sum()
Out[258]:
1802
In [259]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Adjustable Headlights"),axis=1)
main["Adjustable Headlights"]=Filled_DataFrame["Adjustable Headlights"]
In [260]:
# Electric Folding Rear View Mirror
Filled_DataFrame=pd.DataFrame()
main["Electric Folding Rear View Mirror"].isnull().sum()
Out[260]:
7675
In [261]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Electric Folding Rear View Mirror"),axis=1)
main["Electric Folding Rear View Mirror"]=Filled_DataFrame["Electric Folding Rear View Mirror"]
In [262]:
# Rear Window Wiper
Filled_DataFrame=pd.DataFrame()
main["Rear Window Wiper"].isnull().sum()
Out[262]:
23379
In [263]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Rear Window Wiper"),axis=1)
main["Rear Window Wiper"]=Filled_DataFrame["Rear Window Wiper"]
In [264]:
# Rear Window Defogger
Filled_DataFrame=pd.DataFrame()
main["Rear Window Defogger"].isnull().sum()
Out[264]:
12448
In [265]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Rear Window Defogger"),axis=1)
main["Rear Window Defogger"]=Filled_DataFrame["Rear Window Defogger"]
In [266]:
# Rear Window Defogger
Filled_DataFrame=pd.DataFrame()
main["Rear Spoiler"].isnull().sum()
Out[266]:
31738
In [267]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Rear Spoiler"),axis=1)
main["Rear Spoiler"]=Filled_DataFrame["Rear Spoiler"]
In [268]:
# Sun Roof
Filled_DataFrame=pd.DataFrame()
main["Sun Roof"].isnull().sum()
Out[268]:
30497
In [269]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Sun Roof"),axis=1)
main["Sun Roof"]=Filled_DataFrame["Sun Roof"]
In [270]:
# Moon Roof
Filled_DataFrame=pd.DataFrame()
main["Moon Roof"].isnull().sum()
Out[270]:
31052
In [271]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Moon Roof"),axis=1)
main["Moon Roof"]=Filled_DataFrame["Moon Roof"]
In [272]:
# Outside Rear View Mirror Turn Indicators
Filled_DataFrame=pd.DataFrame()
main["Outside Rear View Mirror Turn Indicators"].isnull().sum()
Out[272]:
14934
In [273]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Outside Rear View Mirror Turn Indicators"),axis=1)
main["Outside Rear View Mirror Turn Indicators"]=Filled_DataFrame["Outside Rear View Mirror Turn Indicators"]
In [274]:
# Intergrated Antenna
Filled_DataFrame=pd.DataFrame()
main["Intergrated Antenna"].isnull().sum()
Out[274]:
24501
In [275]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Intergrated Antenna"),axis=1)
main["Intergrated Antenna"]=Filled_DataFrame["Intergrated Antenna"]
In [276]:
# Intergrated Antenna
Filled_DataFrame=pd.DataFrame()
main["Chrome Grille"].isnull().sum()
Out[276]:
33710
In [277]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Chrome Grille"),axis=1)
main["Chrome Grille"]=Filled_DataFrame["Chrome Grille"]
In [278]:
# Halogen Headlamps
Filled_DataFrame=pd.DataFrame()
main["Halogen Headlamps"].isnull().sum()
Out[278]:
39907
In [279]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Halogen Headlamps"),axis=1)
main["Halogen Headlamps"]=Filled_DataFrame["Halogen Headlamps"]
In [280]:
# Roof Rail
Filled_DataFrame=pd.DataFrame()
main["Roof Rail"].isnull().sum()
Out[280]:
36422
In [281]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Roof Rail"),axis=1)
main["Roof Rail"]=Filled_DataFrame["Roof Rail"]
In [282]:
# LED DRLs
Filled_DataFrame=pd.DataFrame()
main["LED DRLs"].isnull().sum()
Out[282]:
26018
In [283]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"LED DRLs"),axis=1)
main["LED DRLs"]=Filled_DataFrame["LED DRLs"]
In [284]:
# LED Taillights
Filled_DataFrame=pd.DataFrame()
main["LED Taillights"].isnull().sum()
Out[284]:
37530
In [285]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"LED Taillights"),axis=1)
main["LED Taillights"]=Filled_DataFrame["LED Taillights"]
In [286]:
# Anti-Lock Braking System
Filled_DataFrame=pd.DataFrame()
main["Anti-Lock Braking System"].isnull().sum()
Out[286]:
2045
In [287]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Anti-Lock Braking System"),axis=1)
main["Anti-Lock Braking System"]=Filled_DataFrame["Anti-Lock Braking System"]
In [288]:
# Central Locking
Filled_DataFrame=pd.DataFrame()
main["Central Locking"].isnull().sum()
Out[288]:
2651
In [289]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Central Locking"),axis=1)
main["Central Locking"]=Filled_DataFrame["Central Locking"]
In [290]:
# Power Door Locks
Filled_DataFrame=pd.DataFrame()
main["Power Door Locks"].isnull().sum()
Out[290]:
3476
In [291]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Power Door Locks"),axis=1)
main["Power Door Locks"]=Filled_DataFrame["Power Door Locks"]
In [292]:
# Child Safety Locks
Filled_DataFrame=pd.DataFrame()
main["Child Safety Locks"].isnull().sum()
Out[292]:
6924
In [293]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Child Safety Locks"),axis=1)
main["Child Safety Locks"]=Filled_DataFrame["Child Safety Locks"]
In [294]:
# Side Airbag-Front
Filled_DataFrame=pd.DataFrame()
main["Side Airbag-Front"].isnull().sum()
Out[294]:
32139
In [295]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Side Airbag-Front"),axis=1)
main["Side Airbag-Front"]=Filled_DataFrame["Side Airbag-Front"]
In [296]:
# Day & Night Rear View Mirror
Filled_DataFrame=pd.DataFrame()
main["Day & Night Rear View Mirror"].isnull().sum()
Out[296]:
37003
In [297]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Day & Night Rear View Mirror"),axis=1)
main["Day & Night Rear View Mirror"]=Filled_DataFrame["Day & Night Rear View Mirror"]
In [298]:
# Passenger Side Rear View Mirror
Filled_DataFrame=pd.DataFrame()
main["Passenger Side Rear View Mirror"].isnull().sum()
Out[298]:
2302
In [299]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Passenger Side Rear View Mirror"),axis=1)
main["Passenger Side Rear View Mirror"]=Filled_DataFrame["Passenger Side Rear View Mirror"]
In [300]:
# Rear Seat Belts
Filled_DataFrame=pd.DataFrame()
main["Rear Seat Belts"].isnull().sum()
Out[300]:
6081
In [301]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Rear Seat Belts"),axis=1)
main["Rear Seat Belts"]=Filled_DataFrame["Rear Seat Belts"]
In [302]:
# Seat Belt Warning
Filled_DataFrame=pd.DataFrame()
main["Seat Belt Warning"].isnull().sum()
Out[302]:
15626
In [303]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Seat Belt Warning"),axis=1)
main["Seat Belt Warning"]=Filled_DataFrame["Seat Belt Warning"]
In [304]:
# Door Ajar Warning
Filled_DataFrame=pd.DataFrame()
main["Door Ajar Warning"].isnull().sum()
Out[304]:
37983
In [305]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Door Ajar Warning"),axis=1)
main["Door Ajar Warning"]=Filled_DataFrame["Door Ajar Warning"]
In [306]:
# Adjustable Seats
Filled_DataFrame=pd.DataFrame()
main["Adjustable Seats"].isnull().sum()
Out[306]:
1598
In [307]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Adjustable Seats"),axis=1)
main["Adjustable Seats"]=Filled_DataFrame["Adjustable Seats"]
In [308]:
# Engine Immobilizer
Filled_DataFrame=pd.DataFrame()
main["Engine Immobilizer"].isnull().sum()
Out[308]:
32910
In [309]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Engine Immobilizer"),axis=1)
main["Engine Immobilizer"]=Filled_DataFrame["Engine Immobilizer"]
In [310]:
# Crash Sensor
Filled_DataFrame=pd.DataFrame()
main["Crash Sensor"].isnull().sum()
Out[310]:
3307
In [311]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Crash Sensor"),axis=1)
main["Crash Sensor"]=Filled_DataFrame["Crash Sensor"]
In [312]:
# Engine Check Warning
Filled_DataFrame=pd.DataFrame()
main["Engine Check Warning"].isnull().sum()
Out[312]:
10614
In [313]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Engine Check Warning"),axis=1)
main["Engine Check Warning"]=Filled_DataFrame["Engine Check Warning"]
In [314]:
# Automatic Headlamps
Filled_DataFrame=pd.DataFrame()
main["Automatic Headlamps"].isnull().sum()
Out[314]:
21576
In [315]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Automatic Headlamps"),axis=1)
main["Automatic Headlamps"]=Filled_DataFrame["Automatic Headlamps"]
In [316]:
# EBD
Filled_DataFrame=pd.DataFrame()
main["EBD"].isnull().sum()
Out[316]:
4208
In [317]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"EBD"),axis=1)
main["EBD"]=Filled_DataFrame["EBD"]
In [318]:
# Electronic Stability Control
Filled_DataFrame=pd.DataFrame()
main["Electronic Stability Control"].isnull().sum()
Out[318]:
41817
In [319]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Electronic Stability Control"),axis=1)
main["Electronic Stability Control"]=Filled_DataFrame["Electronic Stability Control"]
In [320]:
# Rear Camera
Filled_DataFrame=pd.DataFrame()
main["Rear Camera"].isnull().sum()
Out[320]:
11678
In [321]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Rear Camera"),axis=1)
main["Rear Camera"]=Filled_DataFrame["Rear Camera"]
In [322]:
# ISOFIX Child Seat Mounts
Filled_DataFrame=pd.DataFrame()
main["ISOFIX Child Seat Mounts"].isnull().sum()
Out[322]:
22738
In [323]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"ISOFIX Child Seat Mounts"),axis=1)
main["ISOFIX Child Seat Mounts"]=Filled_DataFrame["ISOFIX Child Seat Mounts"]
In [324]:
# Pretensioners & Force Limiter Seatbelts
Filled_DataFrame=pd.DataFrame()
main["Pretensioners & Force Limiter Seatbelts"].isnull().sum()
Out[324]:
32582
In [325]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Pretensioners & Force Limiter Seatbelts"),axis=1)
main["Pretensioners & Force Limiter Seatbelts"]=Filled_DataFrame["Pretensioners & Force Limiter Seatbelts"]
In [326]:
# Hill Assist
Filled_DataFrame=pd.DataFrame()
main["Hill Assist"].isnull().sum()
Out[326]:
30140
In [327]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Hill Assist"),axis=1)
main["Hill Assist"]=Filled_DataFrame["Hill Assist"]
In [328]:
# Radio
Filled_DataFrame=pd.DataFrame()
main["Radio"].isnull().sum()
Out[328]:
2410
In [329]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Radio"),axis=1)
main["Radio"]=Filled_DataFrame["Radio"]
In [330]:
# Audio System Remote Control
Filled_DataFrame=pd.DataFrame()
main["Audio System Remote Control"].isnull().sum()
Out[330]:
64272
In [331]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Audio System Remote Control"),axis=1)
main["Audio System Remote Control"]=Filled_DataFrame["Audio System Remote Control"]
In [332]:
# Speakers Front
Filled_DataFrame=pd.DataFrame()
main["Speakers Front"].isnull().sum()
Out[332]:
2448
In [333]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Speakers Front"),axis=1)
main["Speakers Front"]=Filled_DataFrame["Speakers Front"]
In [334]:
# Speakers Rear
Filled_DataFrame=pd.DataFrame()
main["Speakers Rear"].isnull().sum()
Out[334]:
2389
In [335]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Speakers Rear"),axis=1)
main["Speakers Rear"]=Filled_DataFrame["Speakers Rear"]
In [336]:
# Integrated 2DIN Audio
Filled_DataFrame=pd.DataFrame()
main["Integrated 2DIN Audio"].isnull().sum()
Out[336]:
6788
In [337]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Integrated 2DIN Audio"),axis=1)
main["Integrated 2DIN Audio"]=Filled_DataFrame["Integrated 2DIN Audio"]
In [338]:
# USB & Auxiliary input
Filled_DataFrame=pd.DataFrame()
main["USB & Auxiliary input"].isnull().sum()
Out[338]:
37024
In [339]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"USB & Auxiliary input"),axis=1)
main["USB & Auxiliary input"]=Filled_DataFrame["USB & Auxiliary input"]
In [340]:
# Bluetooth Connectivity
Filled_DataFrame=pd.DataFrame()
main["Bluetooth Connectivity"].isnull().sum()
Out[340]:
5488
In [341]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Bluetooth Connectivity"),axis=1)
main["Bluetooth Connectivity"]=Filled_DataFrame["Bluetooth Connectivity"]
In [342]:
# Android Auto
Filled_DataFrame=pd.DataFrame()
main["Android Auto"].isnull().sum()
Out[342]:
23316
In [343]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Android Auto"),axis=1)
main["Android Auto"]=Filled_DataFrame["Android Auto"]
In [344]:
# Apple CarPlay
Filled_DataFrame=pd.DataFrame()
main["Apple CarPlay"].isnull().sum()
Out[344]:
23881
In [345]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Apple CarPlay"),axis=1)
main["Apple CarPlay"]=Filled_DataFrame["Apple CarPlay"]
In [346]:
# Turning Radius (Metres)
Filled_DataFrame=pd.DataFrame()
main["Turning Radius (Metres)"].isnull().sum()
Out[346]:
56190
In [347]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Turning Radius (Metres)"),axis=1)
main["Turning Radius (Metres)"]=Filled_DataFrame["Turning Radius (Metres)"]
In [348]:
# Vanity Mirror
Filled_DataFrame=pd.DataFrame()
main["Vanity Mirror"].isnull().sum()
Out[348]:
37298
In [349]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Vanity Mirror"),axis=1)
main["Vanity Mirror"]=Filled_DataFrame["Vanity Mirror"]
In [350]:
# Navigation System
Filled_DataFrame=pd.DataFrame()
main["Navigation System"].isnull().sum()
Out[350]:
35966
In [351]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Navigation System"),axis=1)
main["Navigation System"]=Filled_DataFrame["Navigation System"]
In [352]:
# Outside Temperature Display
Filled_DataFrame=pd.DataFrame()
main["Outside Temperature Display"].isnull().sum()
Out[352]:
61521
In [353]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Outside Temperature Display"),axis=1)
main["Outside Temperature Display"]=Filled_DataFrame["Outside Temperature Display"]
In [354]:
# Manually Adjustable Ext. Rear View Mirror
Filled_DataFrame=pd.DataFrame()
main["Manually Adjustable Ext. Rear View Mirror"].isnull().sum()
Out[354]:
24027
In [355]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Manually Adjustable Ext. Rear View Mirror"),axis=1)
main["Manually Adjustable Ext. Rear View Mirror"]=Filled_DataFrame["Manually Adjustable Ext. Rear View Mirror"]
In [356]:
# Power Antenna
Filled_DataFrame=pd.DataFrame()
main["Power Antenna"].isnull().sum()
Out[356]:
27849
In [357]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Power Antenna"),axis=1)
main["Power Antenna"]=Filled_DataFrame["Power Antenna"]
In [358]:
# Brake Assist
Filled_DataFrame=pd.DataFrame()
main["Brake Assist"].isnull().sum()
Out[358]:
45067
In [359]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Brake Assist"),axis=1)
main["Brake Assist"]=Filled_DataFrame["Brake Assist"]
In [360]:
# Anti-Theft Alarm
Filled_DataFrame=pd.DataFrame()
main["Anti-Theft Alarm"].isnull().sum()
Out[360]:
48915
In [361]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Anti-Theft Alarm"),axis=1)
main["Anti-Theft Alarm"]=Filled_DataFrame["Anti-Theft Alarm"]
In [362]:
# Speed Sensing Auto Door Lock
Filled_DataFrame=pd.DataFrame()
main["Speed Sensing Auto Door Lock"].isnull().sum()
Out[362]:
31430
In [363]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Speed Sensing Auto Door Lock"),axis=1)
main["Speed Sensing Auto Door Lock"]=Filled_DataFrame["Speed Sensing Auto Door Lock"]
In [364]:
# Chrome Garnish
Filled_DataFrame=pd.DataFrame()
main["Chrome Garnish"].isnull().sum()
Out[364]:
47823
In [365]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Chrome Garnish"),axis=1)
main["Chrome Garnish"]=Filled_DataFrame["Chrome Garnish"]
In [366]:
# Side Impact Beams
Filled_DataFrame=pd.DataFrame()
main["Side Impact Beams"].isnull().sum()
Out[366]:
62890
In [367]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Side Impact Beams"),axis=1)
main["Side Impact Beams"]=Filled_DataFrame["Side Impact Beams"]
In [368]:
# Drive Type
Filled_DataFrame=pd.DataFrame()
main["Drive Type"].isnull().sum()
Out[368]:
53327
In [369]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Drive Type"),axis=1)
main["Drive Type"]=Filled_DataFrame["Drive Type"]
In [370]:
# Rear Reading Lamp
Filled_DataFrame=pd.DataFrame()
main["Rear Reading Lamp"].isnull().sum()
Out[370]:
42496
In [371]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Rear Reading Lamp"),axis=1)
main["Rear Reading Lamp"]=Filled_DataFrame["Rear Reading Lamp"]
In [372]:
# Cup Holders-Front
Filled_DataFrame=pd.DataFrame()
main["Cup Holders-Front"].isnull().sum()
Out[372]:
44164
In [373]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Cup Holders-Front"),axis=1)
main["Cup Holders-Front"]=Filled_DataFrame["Cup Holders-Front"]
In [374]:
# Leather Seats
Filled_DataFrame=pd.DataFrame()
main["Leather Seats"].isnull().sum()
Out[374]:
29188
In [375]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Leather Seats"),axis=1)
main["Leather Seats"]=Filled_DataFrame["Leather Seats"]
In [376]:
# Driving Experience Control Eco
Filled_DataFrame=pd.DataFrame()
main["Driving Experience Control Eco"].isnull().sum()
Out[376]:
44979
In [377]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Driving Experience Control Eco"),axis=1)
main["Driving Experience Control Eco"]=Filled_DataFrame["Driving Experience Control Eco"]
In [378]:
# Ventilated Seats
Filled_DataFrame=pd.DataFrame()
main["Ventilated Seats"].isnull().sum()
Out[378]:
47550
In [379]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Ventilated Seats"),axis=1)
main["Ventilated Seats"]=Filled_DataFrame["Ventilated Seats"]
In [380]:
# Tyre Pressure Monitor
Filled_DataFrame=pd.DataFrame()
main["Tyre Pressure Monitor"].isnull().sum()
Out[380]:
31789
In [381]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Tyre Pressure Monitor"),axis=1)
main["Tyre Pressure Monitor"]=Filled_DataFrame["Tyre Pressure Monitor"]
In [382]:
# Rain Sensing Wiper
Filled_DataFrame=pd.DataFrame()
main["Rain Sensing Wiper"].isnull().sum()
Out[382]:
36739
In [383]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Rain Sensing Wiper"),axis=1)
main["Rain Sensing Wiper"]=Filled_DataFrame["Rain Sensing Wiper"]
In [384]:
# Turbo Charger
Filled_DataFrame=pd.DataFrame()
main["Turbo Charger"].isnull().sum()
Out[384]:
30909
In [385]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Turbo Charger"),axis=1)
main["Turbo Charger"]=Filled_DataFrame["Turbo Charger"]
In [386]:
# Air Quality Control
Filled_DataFrame=pd.DataFrame()
main["Air Quality Control"].isnull().sum()
Out[386]:
53313
In [387]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Air Quality Control"),axis=1)
main["Air Quality Control"]=Filled_DataFrame["Air Quality Control"]
In [388]:
# Traction Control
Filled_DataFrame=pd.DataFrame()
main["Traction Control"].isnull().sum()
Out[388]:
44622
In [389]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Traction Control"),axis=1)
main["Traction Control"]=Filled_DataFrame["Traction Control"]
In [390]:
# Vehicle Stability Control System
Filled_DataFrame=pd.DataFrame()
main["Vehicle Stability Control System"].isnull().sum()
Out[390]:
56095
In [391]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Vehicle Stability Control System"),axis=1)
main["Vehicle Stability Control System"]=Filled_DataFrame["Vehicle Stability Control System"]
In [392]:
# Rear Window Washer
Filled_DataFrame=pd.DataFrame()
main["Rear Window Washer"].isnull().sum()
Out[392]:
26432
In [393]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Rear Window Washer"),axis=1)
In [394]:
main["Rear Window Washer"]=Filled_DataFrame["Rear Window Washer"]
In [395]:
# Rear Window Washer
Filled_DataFrame=pd.DataFrame()
main["Rear Window Washer"].isnull().sum()
Out[395]:
0
In [396]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Rear Window Washer"),axis=1)
In [392]:
# Gear Box
Filled_DataFrame=pd.DataFrame()
main["Gear Box"].isnull().sum()
Out[392]:
26432
In [396]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Gear Box"),axis=1)
In [397]:
main["Gear Box"]=Filled_DataFrame["Gear Box"]
In [398]:
#Follow Me Home Headlamps
Filled_DataFrame=pd.DataFrame()
main["Follow Me Home Headlamps"].isnull().sum()
Out[398]:
29197
In [399]:
k=prepare_data_for_create_dataframe(non_repitation_data).sort_values(by="Brand").apply(lambda x:fill_catagorical_values(x["Model"],x["Brand"],"Follow Me Home Headlamps"),axis=1)
In [400]:
main["Follow Me Home Headlamps"]=Filled_DataFrame["Follow Me Home Headlamps"]
In [401]:
main.columns
Out[401]:
Index(['Unnamed: 0', 'Model', 'Brand', 'Varient', 'ARAI Mileage(Km/L)',
'Engine Displacement (cc)', 'Seating Capacity', 'Boot Space (Litres)',
'Body Type', 'Fuel Type',
...
'Tyre Pressure Monitor', 'Rain Sensing Wiper', 'Turbo Charger',
'Air Quality Control', 'Traction Control',
'Vehicle Stability Control System', 'BHP', 'RPM', 'NM', 'NM_RPM'],
dtype='object', length=148)
In [402]:
main.isnull().sum()
Out[402]:
Unnamed: 0 0
Model 0
Brand 0
Varient 0
ARAI Mileage(Km/L) 0
..
Vehicle Stability Control System 0
BHP 0
RPM 0
NM 0
NM_RPM 0
Length: 148, dtype: int64
In [6]:
pd.set_option("display.max_rows",None)
pd.DataFrame(main.isnull().any())
Out[6]:
| 0 | |
|---|---|
| Unnamed: 0 | False |
| Unnamed: 0.1 | False |
| Model | False |
| Brand | False |
| Varient | False |
| ARAI Mileage(Km/L) | False |
| Engine Displacement (cc) | False |
| Seating Capacity | False |
| Boot Space (Litres) | False |
| Body Type | False |
| Fuel Type | False |
| No. of cylinder | False |
| TransmissionType | False |
| Fuel Tank Capacity | False |
| Multi-function Steering Wheel | False |
| Touch Screen | False |
| Engine Start Stop Button | False |
| Alloy Wheels | False |
| Power Windows Rear | False |
| Wheel Covers | False |
| Driver Airbag | False |
| Air Conditioner | False |
| Power Adjustable Exterior Rear View Mirror | False |
| Automatic Climate Control | False |
| Anti Lock Braking System | False |
| Fog Lights - Front | False |
| Power Windows Front | False |
| Passenger Airbag | False |
| Power Steering | False |
| Engine Type | False |
| Gear Box | False |
| Emission Norm Compliance | False |
| Front Suspension | False |
| Rear Suspension | False |
| Steering Type | False |
| Steering Column | False |
| Front Brake Type | False |
| Rear Brake Type | False |
| Length (mm) | False |
| Width (mm) | False |
| Height (mm) | False |
| Heater | False |
| Adjustable Steering | False |
| Low Fuel Warning Light | False |
| Accessory Power Outlet | False |
| Trunk Light | False |
| Rear Seat Headrest | False |
| Adjustable Headrest | False |
| Rear Seat Centre Arm Rest | False |
| Height Adjustable Front Seat Belts | False |
| Cup Holders-Rear | False |
| Rear AC Vents | False |
| Seat Lumbar Support | False |
| Cruise Control | False |
| Smart Access Card Entry | False |
| KeyLess Entry | False |
| Engine Start/Stop Button | False |
| Glove Box Cooling | False |
| Voice Control | False |
| Gear Shift Indicator | False |
| Tachometer | False |
| Electronic Multi-Tripmeter | False |
| Fabric Upholstery | False |
| Leather Steering Wheel | False |
| Glove Compartment | False |
| Digital Clock | False |
| Digital Odometer | False |
| Height Adjustable Driver Seat | False |
| Dual Tone Dashboard | False |
| Adjustable Headlights | False |
| Electric Folding Rear View Mirror | False |
| Rear Window Wiper | False |
| Rear Window Washer | False |
| Rear Window Defogger | False |
| Rear Spoiler | False |
| Sun Roof | False |
| Moon Roof | False |
| Outside Rear View Mirror Turn Indicators | False |
| Intergrated Antenna | False |
| Chrome Grille | False |
| Halogen Headlamps | False |
| Roof Rail | False |
| LED DRLs | False |
| LED Taillights | False |
| Anti-Lock Braking System | False |
| Central Locking | False |
| Power Door Locks | False |
| Child Safety Locks | False |
| Side Airbag-Front | False |
| Day & Night Rear View Mirror | False |
| Passenger Side Rear View Mirror | False |
| Rear Seat Belts | False |
| Seat Belt Warning | False |
| Door Ajar Warning | False |
| Adjustable Seats | False |
| Engine Immobilizer | False |
| Crash Sensor | False |
| Engine Check Warning | False |
| Automatic Headlamps | False |
| EBD | False |
| Electronic Stability Control | False |
| Follow Me Home Headlamps | False |
| Rear Camera | False |
| ISOFIX Child Seat Mounts | False |
| Pretensioners & Force Limiter Seatbelts | False |
| Hill Assist | False |
| Radio | False |
| Audio System Remote Control | False |
| Speakers Front | False |
| Speakers Rear | False |
| Integrated 2DIN Audio | False |
| USB & Auxiliary input | False |
| Bluetooth Connectivity | False |
| Android Auto | False |
| Apple CarPlay | False |
| Place | False |
| option | False |
| Price | False |
| City Mileage(Km/L) | False |
| Turning Radius (Metres) | False |
| Vanity Mirror | False |
| Navigation System | False |
| Outside Temperature Display | False |
| Manually Adjustable Ext. Rear View Mirror | False |
| Power Antenna | False |
| Brake Assist | False |
| Anti-Theft Alarm | False |
| Speed Sensing Auto Door Lock | False |
| Chrome Garnish | False |
| Side Impact Beams | False |
| Drive Type | False |
| Rear Reading Lamp | False |
| Cup Holders-Front | False |
| Leather Seats | False |
| Driving Experience Control Eco | False |
| Ventilated Seats | False |
| Tyre Pressure Monitor | False |
| Rain Sensing Wiper | False |
| Turbo Charger | False |
| Air Quality Control | False |
| Traction Control | False |
| Vehicle Stability Control System | False |
| BHP | False |
| RPM | False |
| NM | False |
| NM_RPM | False |
In [7]:
main.to_csv("C://Users//BANAR//Desktop//DataScienceProjects//CarpricePrediction//final_Preprocessed_datas2.csv")